爬取喜馬拉雅FM分類榜的程式碼分析
阿新 • • 發佈:2018-12-02
import requests import json import os headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36' } base_path = 'F:\\SxhMycode\\PYtest\\ead\\FM\\'#基地址 def mkdir(path):#建立資料夾 path = path.strip() path = path.rstrip("\\") isExists = os.path.exists(path) if not isExists: print(path + ' 建立成功') os.makedirs(path) page = 'youshengshu' #關鍵字搜尋 theme_url = 'https://www.ximalaya.com/revision/getRankList?code='+page #大分類下的地址 start_url = 'https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&sort=-1&pageSize=30'#小分類下的地址 theme = requests.get(theme_url,headers=headers) #獲取小方向下的所有FM電臺歌單 ret = theme.content.decode() #獲取程式碼以字串形式表現出來 str = json.loads(ret) #解析json型別的字串 substring = str['data']['albums'] #解析網頁獲取原始碼 for i in substring: albumTitle = i['albumTitle'] #獲取歌單名 id = i['id'] #獲取id path = base_path + albumTitle #獲取歌單名建立資料夾 mkdir(path) for i in range(1): #根據所需歌曲來調大小 url = start_url.format(id , i + 1) #將start_url地址補充完整 r = requests.get(url,headers=headers) ret1 = r.content.decode() str1 = json.loads(ret1) substring1 = str1['data']['tracksAudioPlay'] #同上 for i in substring1: src = i['src'] name = i['trackName'] #下載備註的名字 with open(path+'/%s.m4a' % name, 'ab') as f: r = requests.get(src) f.write(r.content) print(name)