1. 程式人生 > 其它 >爬取網路視訊-未完待續

爬取網路視訊-未完待續

技術標籤:爬蟲

'''
下載電影
https://www.ai66.cc/zhanzhengpian/14490.html
'''
import os
import time
RESPATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),'res')
import requests
from urllib import parse
from requests.packages.urllib3.exceptions import InsecureRequestWarning
class Movie:

    def download_m3u8_files(self,url):
        r = requests.get(url)
        lines = r.text.split('\n')
        tss = []
        for line in lines:
            if line.endswith('.ts'):
                tss.append(parse.urljoin('https://www.nmgxwhz.com:65',line))
            return

    def heBingTsVideo(self,download_path, hebing_path):
        all_ts = os.listdir(download_path)
        with open(hebing_path, 'wb+') as f:
            for i in range(len(all_ts)):
                ts_video_path = os.path.join(download_path, all_ts[i])
                f.write(open(ts_video_path, 'rb').read())
        print("合併完成!!")

    def down_tss(self,tss,name):
        # 下載ts檔案
        global RESPATH
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
        }
        file_name = os.path.join(RESPATH,name)
        if os.path.exists(file_name):
            return
        os.makedirs(file_name)
        all = len(tss)
        for idx,url in enumerate(tss):
            print('{}/{}'.format(idx+1,all))
            downsize = 0
            startTime = time.time()
            requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
            req = requests.get(url, headers=headers, stream=True, verify=False)
            with(open(file_name+'/'+str(idx) + '.ts', 'wb')) as f:
                for chunk in req.iter_content(chunk_size=10000):
                    if chunk:
                        f.write(chunk)
                        downsize += len(chunk)
                        line = 'downloading %d KB/s - %.2f MB, 共 %.2f MB'
                        line = line % (
                            downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024, downsize / 1024 / 1024)
                        # print(line)
    def run(self):
        name = '金剛川'
        # m3u8_url = 'https://www.nmgxwhz.com:65/20201209/vq2N4ajW/1200kb/hls/index.m3u8'
        # tss = self.download_m3u8_files(m3u8_url)
        # self.down_tss(tss, name)

        self.heBingTsVideo(os.path.join(RESPATH,name), os.path.join(RESPATH,name+'.mp4'))
if __name__ == '__main__':
    Movie().run()