爬取網路視訊-未完待續
阿新 • • 發佈:2020-12-14
技術標籤:爬蟲
''' 下載電影 https://www.ai66.cc/zhanzhengpian/14490.html ''' import os import time RESPATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),'res') import requests from urllib import parse from requests.packages.urllib3.exceptions import InsecureRequestWarning class Movie: def download_m3u8_files(self,url): r = requests.get(url) lines = r.text.split('\n') tss = [] for line in lines: if line.endswith('.ts'): tss.append(parse.urljoin('https://www.nmgxwhz.com:65',line)) return def heBingTsVideo(self,download_path, hebing_path): all_ts = os.listdir(download_path) with open(hebing_path, 'wb+') as f: for i in range(len(all_ts)): ts_video_path = os.path.join(download_path, all_ts[i]) f.write(open(ts_video_path, 'rb').read()) print("合併完成!!") def down_tss(self,tss,name): # 下載ts檔案 global RESPATH headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36' } file_name = os.path.join(RESPATH,name) if os.path.exists(file_name): return os.makedirs(file_name) all = len(tss) for idx,url in enumerate(tss): print('{}/{}'.format(idx+1,all)) downsize = 0 startTime = time.time() requests.packages.urllib3.disable_warnings(InsecureRequestWarning) req = requests.get(url, headers=headers, stream=True, verify=False) with(open(file_name+'/'+str(idx) + '.ts', 'wb')) as f: for chunk in req.iter_content(chunk_size=10000): if chunk: f.write(chunk) downsize += len(chunk) line = 'downloading %d KB/s - %.2f MB, 共 %.2f MB' line = line % ( downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024, downsize / 1024 / 1024) # print(line) def run(self): name = '金剛川' # m3u8_url = 'https://www.nmgxwhz.com:65/20201209/vq2N4ajW/1200kb/hls/index.m3u8' # tss = self.download_m3u8_files(m3u8_url) # self.down_tss(tss, name) self.heBingTsVideo(os.path.join(RESPATH,name), os.path.join(RESPATH,name+'.mp4')) if __name__ == '__main__': Movie().run()