python筆記--多程序與多執行緒
阿新 • • 發佈:2019-02-17
import time
import requests
import current
import concurrent
from concurrent import futures
import pandas as pd
import threading
from multiprocessing import Pool
# 裝飾器,列印函式的執行時間
def gettime(func):
def warapper(*args, **kwags):
print("="*50)
print(func.__name__, 'Strat...')
starttime = time.time()
func(*args)
endtime = time.time()
spendtime = endtime - starttime
print(func.__name__, "End..." )
print("Spend", spendtime, "s totally")
print("="*50)
return warapper
# 從檔案去n個測試網站
def get_urls_from_files(n):
df = pd.read_csv('TestUrls.csv')
urls = list(df['url'])[:n]
return urls
# 請求並解析網頁獲取資料
def getdata(url, retries=3):
headers = {}
try:
html = requests.get(url, headers=headers)
except requests.exceptions.ConnectionError as e:
print('下載出錯,錯誤原因:', e)
html = None
# 5XX錯誤為伺服器錯誤,可以重新請求
if(html != None and 500 <= html.status_code <600 and retries):
retries -= 1
print("伺服器出錯正在重試...")
getdata(url, retries)
data = html.text
else :
data = None
return data
# 序列
@gettime
def Mynormal():
for url in urls:
getdata(url)
# 程序池
@gettime
def MyprocessPool(num=10):
pool = Pool(num)
results = pool.map(getdata, urls)
pool.close()
pool.join()
return results
# 執行緒池
@gettime
def Myfutures(num_of_max_works=10):
with concurrent.futures.threadPoolExecutor(max_workers = num_of_max_works) as executor:
executor.map(getdata, urls)
if __name__ == '__main__':
urls = get_urls_from_file(100)
# 序列
Mynormal()
# 程序池
MyprocessPool(10)
# 執行緒池
Myfutures(100)