1. 程式人生 > 其它 >Python爬蟲實戰——音樂爬取

Python爬蟲實戰——音樂爬取

技術標籤:pythonjson

import requests
import re
import json
#存放rid值的url
url = "http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key=%E5%91%A8%E6%9D%B0%E4%BC%A6&pn=1&rn=30&httpsStatus=1&reqId=b287f1e0-37c9-11eb-846b-ed84ae20f627"
#設定headers,偽裝成瀏覽器
header = {"Cookie": "Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1607262271; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1607262271; _ga=GA1.2.1231848360.1607262271; _gid=GA1.2.1396442198.1607262271; kw_token=Z32FDL7NMN"
, "csrf": "Z32FDL7NMN", "Host": "www.kuwo.cn", "Referer": "http://www.kuwo.cn/search/list?key=%E5%91%A8%E6%9D%B0%E4%BC%A6", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
} #使用get方法進行獲取檔案中的資源 res = requests.get(url,headers=header) #由於獲取下來的是json格式的資料,所以我們使用Python的json模組進行解析 dicts = json.loads(res.text) #print(dicts) rids = []#定義一個空列表用來存放爬取的rid names = []#定義一個空列表用來存放音樂名 #print(len(dicts['data']['list'])) for i in range(len(dicts['data']['list'])): rids.append(dicts[
'data']['list'][i]['rid']) names.append(dicts['data']['list'][i]['name']) #設定一個存放歌曲檔案的地址,這裡需要大家改一下路徑,改成自己電腦中有的資料夾,否則後面會報錯 dir = "C:/Users/Music/" #print(names) count = 1 for rid,name in zip(rids,names): #利用原有的url構造一個新的url url2 = "http://www.kuwo.cn/url?format=mp3&rid="+str(rid)+"&response=url&type=convert_url3&br=128kmp3&from=web&t=1607262358400&httpsStatus=1&reqId=5ed69f10-37c9-11eb-bcbe-c7b03e63588b" res2 = requests.get(url2,headers=header) #解析出檔案中存放mp3檔案的url地址 text = json.loads(res2.text) music_url = text['url'] res3 = requests.get(music_url) filename = dir+name+".mp3" #將檔案寫入檔案中去 with open(filename,"wb") as f: f.write(res3.content) print('第'+str(count)+'首歌曲下載成功') count += 1