1. 程式人生 > 實用技巧 >b站路飛學城python課梨視訊專案程式碼

b站路飛學城python課梨視訊專案程式碼

 1 import requests
 2 from lxml import etree
 3 import random
 4 import os
 5 from multiprocessing.dummy import Pool
 6 
 7 if __name__ == '__main__':
 8     # 生成一個存視訊的資料夾
 9     if not os.path.exists('./video'):
10         os.mkdir('./video')
11 
12     url = 'https://www.pearvideo.com/category_5'
13     headers = {
14 "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400' 15 } 16 # proxies={'https': '62.210.38.37:3838'} 代理ip,用了太慢 17 response = requests.get(url=url, headers=headers) 18 page_text = response.text
19 20 tree = etree.HTML(page_text) 21 li_list = tree.xpath('//*[@id="listvideoListUl"]/li') 22 23 urls = [] # 儲存所有視訊的連線和名字 24 for li in li_list: 25 new_url = 'https://www.pearvideo.com/' + li.xpath('./div/a/@href')[0] 26 new_name = li.xpath('./div/a/div[2]/text()')[0] + '.mp4' 27 #
這個方法行不通。因為mp4是動態加載出來的,因此需要抓包ajax請求中的url,不知道怎麼用python抓包,用瀏覽器的抓包工具 28 new_page_text = requests.get(url=new_url, headers=headers).text 29 new_tree = etree.HTML(new_page_text) 30 name = new_tree.xpath('//*[@id="detailsbd"]/div[1]/div[2]/div/div[1]/h1/text()')[0] 31 # print(name) 32 33 # 通過抓包ajax得到一個可以傳送的url和請求偽裝的視訊的url, 34 id_ = str(li.xpath('./div/a/@href')[0]).split('_')[1] 35 # 可傳送請求的url 36 ajax_url = 'https://www.pearvideo.com/videoStatus.jsp?' 37 params = { 38 'contId': id_, 39 'mrd': str(random.random()) 40 } 41 ajax_headers = { 42 "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400', 43 'Referer': 'https://www.pearvideo.com/video_' + id_ 44 } 45 # 加了'Referer': 'https://www.pearvideo.com/video_1708144'後就不會顯示該視訊已下架了 46 dic_obj = requests.get(url=ajax_url, params=params, headers=ajax_headers).json() 47 video_url = dic_obj["videoInfo"]['videos']["srcUrl"] 48 49 # 此處視訊地址做了加密即ajax中得到的地址需要加上cont-,並且修改一段數字為id才是真地址 50 # 真地址:"https://video.pearvideo.com/mp4/third/20201120/cont-1708144-10305425-222728-hd.mp4" 51 # 偽地址:"https://video.pearvideo.com/mp4/third/20201120/1606132035863-10305425-222728-hd.mp4" 52 53 # 得到真url,做字串處理 54 video_true_url = '' 55 s_list = str(video_url).split('/') 56 # print(s_list) 57 for i in range(0, len(s_list)): 58 if i < len(s_list) - 1: 59 video_true_url += s_list[i] + '/' 60 else: 61 ss_list = s_list[i].split('-') 62 # print(ss_list) 63 for j in range(0, len(ss_list)): 64 if j == 0: 65 video_true_url += 'cont-' + id_ + '-' 66 elif j == len(ss_list) - 1: 67 video_true_url += ss_list[j] 68 else: 69 video_true_url += ss_list[j] + '-' 70 # print(video_true_url) 71 72 dic = { 73 'name': name, 74 'url': video_true_url 75 } 76 urls.append(dic) 77 78 # 使用執行緒池對視訊資料進行請求(較為耗時的阻塞操作) 79 def get_video_data(dic_): 80 url_ = dic_['url'] 81 video_data = requests.get(url=url_, headers=headers).content 82 video_path = './video/' + dic_['name'] 83 with open(video_path, 'wb') as fp: 84 fp.write(video_data) 85 print(dic_['name'], '下載成功') 86 87 88 pool = Pool(4) 89 pool.map(get_video_data, urls)