豆瓣網post 爬取帶驗證碼
阿新 • • 發佈:2018-11-07
# -*- coding: utf-8 -*- import scrapy import requests from ..bao.jiema import get_number fromdata = { "source": "movie", "redir": "https://movie.douban.com/", "form_email": "賬號", "form_password": "密碼", "login": "登入" } class BanSpider(scrapy.Spider): name = 'ban' # allowed_domains = ['ban'] start_urls= ['https://www.douban.com/accounts/login?source=movie'] def parse(self, response): print(response.url) images = response.xpath('//*[@id="captcha_image"]/@src').extract_first() all_id = response.xpath('//*[@id="lzform"]/div[5]/div/div/input[2]/@value').extract_first() if images: with open('a.jpg', 'wb+')as f: response1 = requests.get(url=images) f.write(response1.content) fromdata['captcha-id'] = all_id fromdata["captcha-solution"] = get_number() print(fromdata) urls = response.url yield scrapy.FormRequest(urls, formdata=fromdata, callback=self.after_login) def after_login(self,response): all_title= response.xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/a/span[1]/text()').extract() print(all_title)