scrapy中自定義下載中介軟體設定動態User-Agent和代理ip
阿新 • • 發佈:2019-01-22
''' scrapy 自定義下載中介軟體 動態設定User-Agent ''' import random class RandomUserAgent: def __init__(self, agents): self.agents = agents @classmethod def from_crawler(cls, crawler): # 從Settings中載入USER_AGENTS的值 return cls(crawler.settings.getlist('USER_AGENTS')) def process_request(self, request, spider): # 在process_request中設定User-Agent的值 request.headers.setdefault('User-Agent', random.choice(self.agents)) ''' 動態設定代理ip ''' class RandomProxy: def __init__(self, iplist): self.iplist = iplist @classmethod def from_crawler(cls, crawler): # 載入IPLIST return cls(crawler.settings.getlist('IPLIST')) def process_request(self, request, spider): proxy = random.choice(self.iplist) request.meta['proxy'] = proxy # 在settings中設定USER_AGENTS和IPLIST,並激活該中介軟體。