1. 程式人生 > 實用技巧 >selenium在scrapy中的使用流程

selenium在scrapy中的使用流程

# 當前爬蟲用的selenium是同一個

1、在爬蟲中初始化webdriver物件

import scrapy
from selenium import webdriver

class CnblogSpider(scrapy.Spider):
    name = 'cnblog'
    allowed_domains = ['www.cnblogs.com']
    start_urls = ['http://www.cnblogs.com/']

    # 在爬蟲中初始化webdriver物件
    bro = webdriver.Chrome(executable_path='../chromedriver.exe
') def parse(self, response): print(response.status) # 在爬蟲中關閉 def close(self, reason): print("我結束了") self.bro.close()

2、在中介軟體中使用(process_request)

    def process_request(self, request, spider):
        # 繼承selenium
        # from selenium import webdriver
        from
scrapy.http import Response,HtmlResponse # bro= webdriver.Chrome(executable_path='../chromedriver.exe') spider.bro.get('https://dig.chouti.com/') print(spider.bro.page_source) # 必須return response物件 response=HtmlResponse(url='https://dig.chouti.com/',body=spider.bro.page_source.encode('
utf-8'),request=request) return response

3、在settings.py中開啟中介軟體

# 下載中介軟體
DOWNLOADER_MIDDLEWARES = {
   'cnblogs.middlewares.CnblogsDownloaderMiddleware': 543,
}