scrapy 模擬登陸豆瓣
阿新 • • 發佈:2018-12-22
參考: https://blog.csdn.net/qq_37616069/article/details/80376807 # coding=utf-8 import scrapy class DoubanLogin(scrapy.Spider): name = 'douban' start_urls = 'https://www.douban.com/accounts/login' def start_requests(self): #儲存請求頁面的cookie yield scrapy.Request(self.start_urls, callback=self.parse_link, meta={'cookiejar': 1}) def parse_link(self, response): capt_id = response.xpath('//div/input[@name="captcha-id"]/@value').extract() capt = response.xpath('//*[@id="captcha_image"]/@src').extract() '''判斷是否有驗證碼,來構造formdata''' if len(capt) == 0: data = { 'source': 'index_nav', 'form_email': '********', 'form_password': '********', 'redir': 'https://www.douban.com/', 'login': '登入' } else: print(capt) captcha_value = input('input capt: ') # 驗證碼 data = { 'source': 'index_nav', 'form_email': '********', 'form_password': '********', 'captcha-id': capt_id, 'captcha-solution': captcha_value, 'redir': 'https://www.douban.com/', 'login': '登入' } #使用上面儲存的cookie yield scrapy.FormRequest.from_response(response, meta={'cookiejar': response.meta['cookiejar']}, formdata=data, callback=self.after_login) def after_login(self, response): summary = response.xpath('//*[@class="nav-user-account"]/a/span[1]/text()').extract() # 獲取登陸後的簡介 print(summary)
settings.py DEFAULT_REQUEST_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3\ 359.181 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'https://www.douban.com/', 'Accept-Encoding': 'gzip, deflate, br', }