scrapy-redis 自定義去重規則
阿新 • • 發佈:2019-03-29
pdu mes efi import ngs server xxx iter 編碼
############### xxx.py ######
from scrapy_redis.dupefilter import RFPDupeFilter from scrapy_redis.connection import get_redis_from_settings from scrapy_redis import defaults class RdisDupeFilter(RFPDupeFilter): @classmethod def from_settings(cls, settings): server = get_redis_from_settings(settings) key= defaults.DUPEFILTER_KEY % {‘timestamp‘:‘myScrapy‘} debug = settings.getbool(‘DUPEFILTER_DEBUG‘) return cls(server, key=key, debug=debug)
到settings.py中配置
# ######################### scrapy redis連接 ############## REDIS_HOST = "129.28.96.43" #主機名 REDIS_PORT = 6379 #端口 REDIS_PARAMS = {‘password‘:"beta"} REDIS_ENCODEING = "utf-8" #redis編碼類型 # REDIS_URL = ‘redis://user:pwd@hostname:9001‘ #連接URL 優先上面配置 DUPEFILTER_KEY = ‘dupefilter:%(timestamp)s‘ # DUPEFLITER_CLASS = ‘scrapy_redis.dupefilter.RFPDupeFilter‘ DUPEFLITER_CLASS = ‘myscrapy.xxx.RedisDupeFilter‘
scrapy-redis 自定義去重規則