python3通過request多程序獲取駕校一點通試題庫
阿新 • • 發佈:2020-12-31
- 通過開發者工具找到試題連結地址;
- 對試題連結的url進行分析,發現index是試題id名稱,構造隨機數,可使用range或者excel拉出全部;
- 對json資料進行欄位分析
- 我這裡分開寫了兩個指令碼,一個是獲取資料一個是轉成excel,本文主要為多程序獲取資料
- 開發環境python3.9.1/windows10/vscode
-
#coding:utf-8 import requests from concurrent.futures import ProcessPoolExecutor import json # 通過url獲取資料 # url = 'http://mnks.jxedt.com/get_question?r=0.5376675619396274&index=3' urls_list = [] with open('D:/YYFX/ip.txt','r') as f: for line in f: #print line, urls_list.append(line.replace('\n', '')) #模擬瀏覽器header hea = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'} #程序 pool = ProcessPoolExecutor(20) def get_page(url): #requests.get 自帶 json.load response = requests.get('http://%s'%(url),headers = hea,timeout = 30 ,verify=False) response = response.content #將bytes轉換成字串 response = response.decode('utf-8') return response def read_data(future,*args,**kwargs): response = future.result() state = json.loads(response) # print(response.status_code,response.url) print (state) #product = response1["question"]+'\n' with open('%s.json'%'data','a',encoding='utf-8') as f: #儲存json資料防止亂碼 f.write(json.dumps(state,ensure_ascii=False) + '\n') f.close() def main(): for url in urls_list: done = pool.submit(get_page,url) done.add_done_callback(read_data) if __name__ == '__main__': main() pool.shutdown(wait=True) f.close()