1. 程式人生 > 實用技巧 >爬取梨視訊

爬取梨視訊

import requests

import re

res = requests.get('https://www.pearvideo.com/popular_9')

# print(res.text) #拿到的是一個頁面的所有視訊html頁面

re_video = '<a href="(.*?)" class="actplay">'
video_urls = re.findall(re_video, res.text)
print(video_urls) #拿到的是改頁面所有視訊連結最後一部分資料,在for迴圈中進行拼接即可得到正確的視訊連結地址
# https://www.pearvideo.com/
for video in video_urls: url = 'https://www.pearvideo.com/' + video #視訊詳情頁 print(url) # # 向視訊詳情傳送get請求 res_video = requests.get(url) video_id = video.split('_')[1] #得到視訊的id video_url = 'https://www.pearvideo.com/videoStatus.jsp?contId=' + video_id #視訊頁連結 header = {
'Referer': 'https://www.pearvideo.com/' + video } #請求頭 res = requests.get(video_url, headers=header) #模擬傳送ajax請求 print(res.json(),11111111111) #需要json轉碼 real_mp4 = res.json()['videoInfo']['videos']['srcUrl']
#拿到視訊連結
#https://video.pearvideo.com/mp4/adshort/20210118/ 1611024074140 -15578857_adpkg-ad_hd.mp4 不可以播放 real_mp42
= real_mp4.replace(real_mp4.split('/')[-1].split('-')[0], 'cont-%s' % video_id)
#進行連結替換得到真正的視訊連結
 # https://video.pearvideo.com/mp4/adshort/20210118/ cont-1716868 -15578857_adpkg-ad_hd.mp4 可以播放 print(real_mp42) res
= requests.get(real_mp42) name = real_mp42.split('/')[-1] with open('video/%s' % name, 'wb') as f: for line in res.iter_content(): f.write(line)