爬蟲(4):抓取ajax資料
阿新 • • 發佈:2019-01-28
import urllib.request import json # 請求頭 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"} # ajax請求url for i in range(100): url = "https://movie.douban.com/j/new_search_subjects?sort=T&range=0,10&tags=&start=%d"% (i * 20) req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req).read().decode('utf-8') # print(response) # json資料 data = json.loads(response) for i in data['data']: # 明星 casts = i['casts'] # 導演 directors = i['directors'] print(casts, directors) # 寫入檔案 with open('movie.txt', 'a+', encoding='utf-8', errors='ignore') as f: f.write(str((casts, directors)) + '\n') f.flush()