scrapy 命令列傳參
阿新 • • 發佈:2020-07-15
class SciencedirectspiderSpider(scrapy.Spider): name = 'sciencedirectspider' allowed_domains = ['sciencedirect.com'] start_urls = ['https://www.sciencedirect.com/search?qs=kidney%20stone'] # 在初始化這裡進行 def __init__(self, year='', search='', **kwargs): self.year = year self.search= search self.urls = 'https://www.sciencedirect.com/search?qs=' + search + '&years=' + year + '&sortBy=date' self.browser = webdriver.Chrome(chrome_options=chorme_options) super().__init__() def start_requests(self): # //*[@id="srp-pagination"]/li[1]/text()[4] url= "https://www.sciencedirect.com/search?qs=kidney%20stone" response = scrapy.Request(self.urls, callback=self.page, meta={'url': self.urls}) yield response
執行命令:scrapy crawl sciencedirectspider --nolog -a "search=kidney stone" -a "year=2019"
** 注意一個-a一個引數
main執行語句:
from scrapy.cmdline importexecute # execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog']) # 不列印日誌 execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog','-a','search=kidney stone','-a','year=2019']) # 不列印日誌