1. 程式人生 > 其它 >爬蟲_urllib中ajax的get請求

爬蟲_urllib中ajax的get請求

1.爬取豆瓣電影第一頁資料並下載

#get請求
#獲取豆瓣電影的第一頁的資料並且保持起來
import urllib.request
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=0&limit=20'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36
' } # (1) 請求物件的定製 request = urllib.request.Request(url=url,headers=headers) # (2) 獲取響應的資料 response = urllib.request.urlopen(request) content = response.read().decode('utf-8') # (3) 資料下載到本地 # open方法預設情況下使用的是gbk的編碼,如果我們要想儲存漢字,那麼需要在open方法中制定編碼格式為utf-8 # encoding = 'utf-8' ft = open('douban.json','w',encoding='
utf-8') ft.write(content)

2.爬取豆瓣電影前10頁資料並下載本地

 經過分析獲取請求介面地址:https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=0&limit=20

#get請求
#獲取豆瓣電影的前10頁的資料並且保持起來
import urllib.request
import urllib.parse
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=0&limit=20
' # https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=0&limit=20 # https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=20&limit=20 # https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=40&limit=20 # https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&start=60&limit=20 #page 1 2 3 4 #start 0 20 40 60 #start (page-1)*20 #======================= #@description:請求物件定製 #@author:tnwner #@date:2022-05-23 13:19 #====================== def create_request(page): base_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&' data = { 'start':(page-1)*20, 'limit':20 } data = urllib.parse.urlencode(data) url = base_url+data print(url) headers= { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36' } request = urllib.request.Request(url=url,headers=headers) return request #======================= #@description:獲取響應的資料 #@author:tnwner #@date:2022-05-23 13:19 #====================== def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open('douban_' + str(page) + '.json','w',encoding='utf-8') as fp: fp.write(content) if __name__ == '__main__': start_input = int(input('請輸入起始的頁碼')) end_inout = int(input('請輸入結束的頁碼')) for page in range(start_input,end_inout+1): #請求物件的定製 request = create_request(page) #獲取響應的資料 content = get_content(request) #下載 down_load(page,content)