使用多程序請求多個url來減少網路等待浪費的時間
阿新 • • 發佈:2020-12-26
code
from multiprocessing import Pool import requests import json import os def get_page(url): print('<程序%s> get %s' %(os.getpid(),url)) respone=requests.get(url) if respone.status_code == 200: return {'url':url,'text':respone.text} def pasrse_page(res): print('<程序%s> parse %sOutputs' %(os.getpid(),res['url'])) parse_res='url:<%s> size:[%s]\n' %(res['url'],len(res['text'])) with open('db.txt','a') as f: f.write(parse_res) if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.python.org', 'https://www.openstack.org', 'https://help.github.com/', 'http://www.sina.com.cn/' ] p=Pool(3) res_l=[] for url in urls: res=p.apply_async(get_page,args=(url,),callback=pasrse_page) res_l.append(res) p.close() p.join() #print([res.get() for res in res_l]) #拿到的是get_page的結果,其實完全沒必要拿該結果,該結果已經傳給回撥函式處理了
macname@MacdeMacBook-Pro py % python3 cccccc.py <程序61068> get https://www.baidu.com <程序61069> get https://www.python.org <程序61070> get https://www.openstack.org <程序61068> get https://help.github.com/ <程序61067> parse https://www.baidu.com <程序61069> get http://www.sina.com.cn/ <程序61067> parse https://www.python.org <程序61067> parse http://www.sina.com.cn/ <程序61067> parse https://help.github.com/ <程序61067> parse https://www.openstack.org macname@MacdeMacBook-Pro py %