1. 程式人生 > 實用技巧 >C++20新執行緒 jthread 體驗程式碼

C++20新執行緒 jthread 體驗程式碼

這篇文章主要介紹了python 爬取B站原視訊的例項程式碼,幫助大家更好的理解和使用python 爬蟲,感興趣的朋友可以瞭解下

B站原視訊爬取,我就不多說直接上程式碼。直接執行就好。
B站是把視訊和音訊分開。要把2個合併起來使用。這個需要分析才能看出來。然後就是登陸這塊是比較難的。

  1 import os
  2 import re
  3 import argparse
  4 import subprocess
  5 import prettytable
  6 from DecryptLogin import login
  7  
  8  
  9 '''B站類'''
 10 class
Bilibili(): 11 def __init__(self, username, password, **kwargs): 12 self.username = username 13 self.password = password 14 self.session = Bilibili.login(username, password) 15 self.headers = { 16 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36
' 17 } 18 self.user_info_url = 'http://api.bilibili.com/x/space/acc/info' 19 self.submit_videos_url = 'http://space.bilibili.com/ajax/member/getSubmitVideos' 20 self.view_url = 'http://api.bilibili.com/x/web-interface/view' 21 self.video_player_url = 'http://api.bilibili.com/x/player/playurl' 22 '''執行主程式''' 23 def
run(self): 24 while True: 25 userid = input('請輸入目標使用者ID(例:345993405)(我的一個LOL好友凱撒可以關注他一下 謝謝) ——> ') 26 user_info = self.__getUserInfo(userid) 27 tb = prettytable.PrettyTable() 28 tb.field_names = list(user_info.keys()) 29 tb.add_row(list(user_info.values())) 30 print('獲取的使用者資訊如下:') 31 print(tb) 32 is_download = input('是否下載該使用者的所有視訊(y/n, 預設: y) ——> ') 33 if is_download == 'y' or is_download == 'yes' or not is_download: 34 self.__downloadVideos(userid) 35 '''根據userid獲得該使用者基本資訊''' 36 def __getUserInfo(self, userid): 37 params = {'mid': userid, 'jsonp': 'jsonp'} 38 res = self.session.get(self.user_info_url, params=params, headers=self.headers) 39 res_json = res.json() 40 user_info = { 41 '使用者名稱': res_json['data']['name'], 42 '性別': res_json['data']['sex'], 43 '個性簽名': res_json['data']['sign'], 44 '使用者等級': res_json['data']['level'], 45 '生日': res_json['data']['birthday'] 46 } 47 return user_info 48 '''下載目標使用者的所有視訊''' 49 def __downloadVideos(self, userid): 50 if not os.path.exists(userid): 51 os.mkdir(userid) 52 # 非會員使用者只能下載到高清1080P 53 quality = [('16', '流暢 360P'), 54 ('32', '清晰 480P'), 55 ('64', '高清 720P'), 56 ('74', '高清 720P60'), 57 ('80', '高清 1080P'), 58 ('112', '高清 1080P+'), 59 ('116', '高清 1080P60')][-3] 60 # 獲得使用者的視訊基本資訊 61 video_info = {'aids': [], 'cid_parts': [], 'titles': [], 'links': [], 'down_flags': []} 62 params = {'mid': userid, 'pagesize': 30, 'tid': 0, 'page': 1, 'order': 'pubdate'} 63 while True: 64 res = self.session.get(self.submit_videos_url, headers=self.headers, params=params) 65 res_json = res.json() 66 for item in res_json['data']['vlist']: 67 video_info['aids'].append(item['aid']) 68 if len(video_info['aids']) < int(res_json['data']['count']): 69 params['page'] += 1 70 else: 71 break 72 for aid in video_info['aids']: 73 params = {'aid': aid} 74 res = self.session.get(self.view_url, headers=self.headers, params=params) 75 cid_part = [] 76 for page in res.json()['data']['pages']: 77 cid_part.append([page['cid'], page['part']]) 78 video_info['cid_parts'].append(cid_part) 79 title = res.json()['data']['title'] 80 title = re.sub(r"[‘'\/\\\:\*\?\"\<\>\|\s']", ' ', title) 81 video_info['titles'].append(title) 82 print('共獲取到使用者ID<%s>的<%d>個視訊...' % (userid, len(video_info['titles']))) 83 for idx in range(len(video_info['titles'])): 84 aid = video_info['aids'][idx] 85 cid_part = video_info['cid_parts'][idx] 86 link = [] 87 down_flag = False 88 for cid, part in cid_part: 89 params = {'avid': aid, 'cid': cid, 'qn': quality, 'otype': 'json', 'fnver': 0, 'fnval': 16} 90 res = self.session.get(self.video_player_url, params=params, headers=self.headers) 91 res_json = res.json() 92 if 'dash' in res_json['data']: 93 down_flag = True 94 v, a = res_json['data']['dash']['video'][0], res_json['data']['dash']['audio'][0] 95 link_v = [v['baseUrl']] 96 link_a = [a['baseUrl']] 97 if v['backup_url']: 98 for item in v['backup_url']: 99 link_v.append(item) 100 if a['backup_url']: 101 for item in a['backup_url']: 102 link_a.append(item) 103 link = [link_v, link_a] 104 else: 105 link = [res_json['data']['durl'][-1]['url']] 106 if res_json['data']['durl'][-1]['backup_url']: 107 for item in res_json['data']['durl'][-1]['backup_url']: 108 link.append(item) 109 video_info['links'].append(link) 110 video_info['down_flags'].append(down_flag) 111 # 開始下載 112 out_pipe_quiet = subprocess.PIPE 113 out_pipe = None 114 aria2c_path = os.path.join(os.getcwd(), 'tools/aria2c') 115 ffmpeg_path = os.path.join(os.getcwd(), 'tools/ffmpeg') 116 for idx in range(len(video_info['titles'])): 117 title = video_info['titles'][idx] 118 aid = video_info['aids'][idx] 119 down_flag = video_info['down_flags'][idx] 120 print('正在下載視訊<%s>...' % title) 121 if down_flag: 122 link_v, link_a = video_info['links'][idx] 123 # --視訊 124 url = '"{}"'.format('" "'.join(link_v)) 125 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}' 126 command = command.format(aria2c_path, len(link_v), userid, title+'.flv', aid, "", url) 127 print(command) 128 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 129 process.wait() 130 # --音訊 131 url = '"{}"'.format('" "'.join(link_a)) 132 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}' 133 command = command.format(aria2c_path, len(link_v), userid, title+'.aac', aid, "", url) 134 print(command) 135 136 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 137 process.wait() 138 # --合併 139 command = '{} -i "{}" -i "{}" -c copy -f mp4 -y "{}"' 140 command = command.format(ffmpeg_path, os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.aac'), os.path.join(userid, title+'.mp4')) 141 print(command) 142 143 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True) 144 process.wait() 145 os.remove(os.path.join(userid, title+'.flv')) 146 os.remove(os.path.join(userid, title+'.aac')) 147 else: 148 link = video_info['links'][idx] 149 url = '"{}"'.format('" "'.join(link)) 150 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}' 151 command = command.format(aria2c_path, len(link), userid, title+'.flv', aid, "", url) 152 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True) 153 process.wait() 154 os.rename(os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.mp4')) 155 print('所有視訊下載完成, 該使用者所有視訊儲存在<%s>資料夾中...' % (userid)) 156 '''藉助大佬開源的庫來登入B站''' 157 @staticmethod 158 def login(username, password): 159 _, session = login.Login().bilibili(username, password) 160 return session 161 162 163 '''run''' 164 if __name__ == '__main__': 165 parser = argparse.ArgumentParser(description='下載B站指定使用者的所有視訊(僅支援Windows下使用)') 166 parser.add_argument('--username', dest='username', help='xxx', type=str, required=True) 167 parser.add_argument('--password', dest='password', help='xxxx', type=str, required=True) 168 print(parser) 169 args = parser.parse_args(['--password', 'xxxx','--username', 'xxx']) 170 # args = parser.parse_args(['--password', 'FOO']) 171 print('5') 172 bili = Bilibili(args.username, args.password) 173 bili.run()

把賬號密碼填上就行。以上就是python 爬取B站原視訊的例項程式碼的詳細內容

福利:如果你的學習時間不是很緊張,並且又想快速的提搞Python開發技術,Python爬蟲,Python大資料分析,人工智慧等技術,這裡給大家分享一套系統教學資源,加一下我建的Python技術的學習裙;九三七六六七五零九,一起學習。群裡有相關開發工具,學習教程!