1. 程式人生 > 實用技巧 >Python爬取b站視訊

Python爬取b站視訊

import requests
import re
import random


class BLBL(object):
    def __init__(self, url, cookie, referer):
        # 需要爬取的網頁字首 例如:https://www.bilibili.com/video/av49035382       ?from=search&seid=1058195128616882249
        self.base_url = url
        # cookie內容
        self.cookie = cookie
        #
referer內容 self.referer = referer # 請求頭資訊 self.accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' self.accept_Encoding = 'gzip, deflate, br' self.accept_Language = 'zh-CN,zh;q=0.9,en;q=0.8' self.user_agent
= "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) " def html(self): # 訪問起始網頁需新增的請求頭,不加的話,得不到完整的原始碼(反爬) base_headers = { 'Accept': self.accept, 'Accept-Encoding': self.accept_Encoding, 'Accept-Language
': self.accept_Language, 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': self.cookie, 'Host': 'www.bilibili.com', 'Referer': self.referer, 'Upgrade-Insecure-Requests': '1', 'User-Agent': self.user_agent } # 請求網頁 base_response = requests.get(self.base_url, headers=base_headers) print(base_headers) # 獲取網頁html程式碼 html = base_response.text # print(html.headers) return html def xin_xi(self, html): print(html) try: # 獲取視訊名稱 video_name = re.search('<title>(.+)</title>', html, re.S).group(1) + '.flv' except: # 如果獲取失敗,就隨機一個名字 video_name = str(random.randint(100000,1000000))+'.flv' print(video_name) # 獲取視訊連結 download_url = re.search(r'("url":"|"baseUrl":"|"backupUrl":\[")(.+?)("|"])', html, re.S).group(2) print(download_url,111) # 獲取主機資訊 host = re.search(r'//(.+\.com)', download_url, re.S).group(1) print(host) return video_name, download_url, host def video(self, html): # 獲取視訊名稱,視訊網址,主機 video_name, download_url, host = self.xin_xi(html) # 請求視訊下載地址時需要新增的請求頭 download_headers = { 'User-Agent': self.user_agent, 'Referer': self.referer, 'Origin': 'https://www.bilibili.com', 'Host': host, 'Accept': self.accept, 'Accept-Encoding': self.accept_Encoding, 'Accept-Language': self.accept_Language } # 獲取視訊資源,並寫入檔案 with open(video_name, 'wb') as f: f.write(requests.get(download_url, headers=download_headers, stream=True, verify=False).content) def run(self): html = self.html() self.video(html) print('爬取成功') if __name__ == '__main__': url=input("請輸入網址:") cookie = "" //這裡要自己登入b站後得到開啟控制檯得到自己的cookie blbl = BLBL(url, cookie, referer) blbl.run()