Python爬蟲實戰——音樂爬取
阿新 • • 發佈:2020-12-17
import requests
import re
import json
#存放rid值的url
url = "http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key=%E5%91%A8%E6%9D%B0%E4%BC%A6&pn=1&rn=30&httpsStatus=1&reqId=b287f1e0-37c9-11eb-846b-ed84ae20f627"
#設定headers,偽裝成瀏覽器
header = {"Cookie": "Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1607262271; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1607262271; _ga=GA1.2.1231848360.1607262271; _gid=GA1.2.1396442198.1607262271; kw_token=Z32FDL7NMN" ,
"csrf": "Z32FDL7NMN",
"Host": "www.kuwo.cn",
"Referer": "http://www.kuwo.cn/search/list?key=%E5%91%A8%E6%9D%B0%E4%BC%A6",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" }
#使用get方法進行獲取檔案中的資源
res = requests.get(url,headers=header)
#由於獲取下來的是json格式的資料,所以我們使用Python的json模組進行解析
dicts = json.loads(res.text)
#print(dicts)
rids = []#定義一個空列表用來存放爬取的rid
names = []#定義一個空列表用來存放音樂名
#print(len(dicts['data']['list']))
for i in range(len(dicts['data']['list'])):
rids.append(dicts[ 'data']['list'][i]['rid'])
names.append(dicts['data']['list'][i]['name'])
#設定一個存放歌曲檔案的地址,這裡需要大家改一下路徑,改成自己電腦中有的資料夾,否則後面會報錯
dir = "C:/Users/Music/"
#print(names)
count = 1
for rid,name in zip(rids,names):
#利用原有的url構造一個新的url
url2 = "http://www.kuwo.cn/url?format=mp3&rid="+str(rid)+"&response=url&type=convert_url3&br=128kmp3&from=web&t=1607262358400&httpsStatus=1&reqId=5ed69f10-37c9-11eb-bcbe-c7b03e63588b"
res2 = requests.get(url2,headers=header)
#解析出檔案中存放mp3檔案的url地址
text = json.loads(res2.text)
music_url = text['url']
res3 = requests.get(music_url)
filename = dir+name+".mp3"
#將檔案寫入檔案中去
with open(filename,"wb") as f:
f.write(res3.content)
print('第'+str(count)+'首歌曲下載成功')
count += 1