1. 程式人生 > >scrapy 模擬登陸豆瓣

scrapy 模擬登陸豆瓣

參考: https://blog.csdn.net/qq_37616069/article/details/80376807


# coding=utf-8
import scrapy


class DoubanLogin(scrapy.Spider):
    name = 'douban'
    start_urls = 'https://www.douban.com/accounts/login'

    def start_requests(self):
        #儲存請求頁面的cookie
        yield scrapy.Request(self.start_urls, callback=self.parse_link, meta={'cookiejar': 1})

    def parse_link(self, response):
        capt_id = response.xpath('//div/input[@name="captcha-id"]/@value').extract()
        capt = response.xpath('//*[@id="captcha_image"]/@src').extract()
        '''判斷是否有驗證碼,來構造formdata'''
        if len(capt) == 0:
            data = {
                'source': 'index_nav',
                'form_email': '********',
                'form_password': '********',
                'redir': 'https://www.douban.com/',
                'login': '登入'
            }
        else:
            print(capt)
            captcha_value = input('input capt: ')  # 驗證碼
            data = {
                'source': 'index_nav',
                'form_email': '********',
                'form_password': '********',
                'captcha-id': capt_id,
                'captcha-solution': captcha_value,
                'redir': 'https://www.douban.com/',
                'login': '登入'
            }
        #使用上面儲存的cookie
        yield scrapy.FormRequest.from_response(response,
                                               meta={'cookiejar': response.meta['cookiejar']},
                                               formdata=data,
                                               callback=self.after_login)

    def after_login(self, response):
        summary = response.xpath('//*[@class="nav-user-account"]/a/span[1]/text()').extract()  # 獲取登陸後的簡介
        print(summary)
settings.py

DEFAULT_REQUEST_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3\
        359.181 Safari/537.36',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Referer': 'https://www.douban.com/',
    'Accept-Encoding': 'gzip, deflate, br',
}