scrapy 自定義代理
阿新 • • 發佈:2018-11-05
自帶的代理不好用,可以自定義。
class ProxyMiddleware(object): def process_request(self, request, spider):#次函式名不能改 #代理列表 PROXIES = [ {'ip_port': '111.11.228.75:80', 'user_pass': ''}, {'ip_port': '120.198.243.22:80', 'user_pass': ''}, {'ip_port': '111.8.60.9:8123', 'user_pass': ''}, {'ip_port': '101.71.27.120:80', 'user_pass': ''}, {'ip_port': '122.96.59.104:80', 'user_pass': ''}, {'ip_port': '122.224.249.122:8088', 'user_pass': ''}, ] proxy = random.choice(PROXIES)#隨機選擇代理 if proxy['user_pass'] is not None:#如果代理不需要密碼,在meta中新增 request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])#轉為bytes encoded_user_pass = base64.encodestring(to_bytes(proxy['user_pass']))#使用者名稱密碼用base64加密 request.headers['Proxy-Authorization'] = to_bytes('Basic ' + encoded_user_pass)#在請求頭中加入代理 print "**************ProxyMiddleware have pass************" + proxy['ip_port'] else:#代理需要密碼 print "**************ProxyMiddleware no pass************" + proxy['ip_port'] request.meta['proxy'] = to_bytes("http://%s" % proxy['ip_port'])
最後在settings裡配置下, DOWNLOADER_MIDDLEWARES = {'step8_king.middlewares.ProxyMiddleware': 500,}