2021年最新python梨視訊爬取
阿新 • • 發佈:2021-11-21
話不多說,直接上程式碼,如果爬取程式碼失效了,可以留下言,我看到會改。
import requests from lxml import etree from multiprocessing.dummy import Pool import re headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/8' } url = 'https://www.pearvideo.com/category_5' page_text = requests.get(url, headers).text tree = etree.HTML(page_text) life_list = tree.xpath("//div[@class='vervideo-bd']/a/@href") pool_len = len(life_list) urls = [] for life in life_list: name_url = "https://www.pearvideo.com/" + life name_page_text = requests.get(name_url, headers = headers).text tree = etree.HTML(name_page_text) name = tree.xpath("//div[@id='poster']/img/@alt")[0] video_ip = re.findall('^video_(.*)', life)[0] detail_url= 'https://www.pearvideo.com/videoStatus.jsp?contId=' + video_ip headers = { 'Referer': 'https://www.pearvideo.com/video_' + life, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.6241 SLBChan/8' } detail_page_text = requests.get(detail_url, headers = headers).text errorUrl = re.findall('"srcUrl":"(.*)"', detail_page_text)[0] reUrl = re.search('https://video.pearvideo.com/mp4/(.*?)/(.*?)/(.*?)-(.*)', errorUrl) url1 = 'https://video.pearvideo.com/mp4/' url2 = reUrl.group(1) + '/' url3 = reUrl.group(2) + '/' url4 = 'cont-' + video_ip + '-' url5 = reUrl.group(4) srcUrl = url1 + url2 + url3 + url4 + url5 dic = { 'name': name, 'url': srcUrl } urls.append(dic) def get_video_data(dic): url = dic['url'] data = requests.get(url = url, headers = headers).content with open(dic['name'] + '.mp4', 'wb') as fp: fp.write(data) print(dic['name'], '下載成功') pool = Pool(pool_len) pool.map(get_video_data, urls) pool.close() pool.join()