scrapy下載中間設定
阿新 • • 發佈:2018-12-09
class ProxyMiddleWare(object):
"""docstring for ProxyMiddleWare"""
def process_request(self,request, spider):
'''對request物件加上proxy'''
proxy = self.get_random_proxy()
print("this is request ip:"+proxy)
request.meta['proxy'] = proxy
def process_response(self, request, response, spider):
'''對返回的response處理'''
# 如果返回的response狀態不是200,重新生成當前request物件
if response.status != 200:
proxy = self.get_random_proxy()
print("this is response ip:"+proxy)
# 對當前reque加上代理
request.meta['proxy' ] = proxy
return request
return response
def get_random_proxy(self):
'''隨機從檔案中讀取proxy'''
while 1:
with open('G:\\Scrapy_work\\myproxies\\myproxies\\proxies.txt', 'r') as f:
proxies = f.readlines()
if proxies:
break
else:
time.sleep(1)
proxy = random.choice(proxies).strip()
return proxy