利用python爬蟲實現:抖音短視訊無水印視訊下載
阿新 • • 發佈:2018-12-31
各位小夥伴,之前一段時間迷上了抖音小姐姐視訊,但是下載的視訊都有水印,於是自己用Python 寫了個爬取小姐姐視訊的工具,大家可以直接拷貝到自己編譯器上執行。經過半天的琢磨,自己用python搞出來一個根據抖音分享視訊連結去除水印並下載的功能。此外,我還利用pyqt5寫了個操作介面,這樣可以方便不懂程式碼的人使用。後面程式碼我都會一一貼上來。備註:這個只是個人興趣,參考程式碼的小夥伴切莫用於非法商業途徑;
環境:python3.0 pychram/eric
url 獲取方式:頁面右邊的【分享】按鈕-》 複製連結。 提取出裡面的Url就好了。
直接上程式碼:
# -*- coding:utf-8 -*- from splinter.driver.webdriver.chrome import Options, Chrome from splinter.browser import Browser from contextlib import closing import requests, json, time, re, os, sys class douyin(): def __init__(self): pass """ 視訊下載 Parameters: video_url: 帶水印的視訊地址 video_name: 視訊名 Returns: 無 """ def video_downloader(self, video_url, video_name=r'douyinsss.mp4'): size = 0 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.3.2.1000 Chrome/30.0.1599.101 Safari/537.36"} try: with closing(requests.get(video_url, headers = headers, stream=True, verify = False)) as response: chunk_size = 1024 #print(response.text) content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [檔案大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) """ with open(video_name, 'ab') as file: file.write(response.content) file.flush() print('receive data,file size : %d total size:%d' % (os.path.getsize(video_name), content_size)) """ with open(video_name, "wb") as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size += len(data) file.flush() #sys.stdout.write(' [下載進度]:%.2f%%' % float(size / content_size * 100) + '\r') #sys.stdout.flush() print('視訊下載完了...') except Exception as e: print(e) print('下載出錯啦.....') """ 視訊下載地址獲取 Parameters: video_url: 帶水印的視訊地址 Returns: 視訊下載連結,視訊名字 """ def downloadUrlGet(self, video_url): name = '' downloadUrl = '' headers = { 'Proxy-Connection':'keep-alive', 'Host': 'v.douyin.com', 'Upgrade-Insecure-Requests':'1', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36", 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' } req = requests.get(url = video_url, headers = headers, verify = False) newUrl = req.url #print(req.text) print('newUrl:%s'%newUrl) print(req.history) #302重定向後的請求 headers = { 'Proxy-Connection':'keep-alive', 'Host': 'www.iesdouyin.com', 'Upgrade-Insecure-Requests':'1', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36", 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' } req = requests.get(url = newUrl, headers = headers, verify = False) reply = req.text #print(reply) p = reply.find('playAddr: "') + len('playAddr: "') downloadUrl = reply[p: reply.find('"', p)] print('downloadUrl:%s'%downloadUrl) p = reply.find('"name nowrap">') + len('"name nowrap">') name = reply[p: reply.find('<', p)] print(name) return downloadUrl, name """ 開始主任務 """ url = 'http://v.douyin.com/dU2Dsn/' handel = douyin() downloadUrl, name = handel.downloadUrlGet(url) handel.video_downloader(url, name)