1. 程式人生 > >豆瓣網post 爬取帶驗證碼

豆瓣網post 爬取帶驗證碼

# -*- coding: utf-8 -*-
import scrapy
import requests
from ..bao.jiema import get_number

fromdata = {
"source": "movie",
"redir": "https://movie.douban.com/",
"form_email": "賬號",
"form_password": "密碼",
"login": "登入"
}

class BanSpider(scrapy.Spider):
    name = 'ban'
    # allowed_domains = ['ban']
    start_urls
= ['https://www.douban.com/accounts/login?source=movie'] def parse(self, response): print(response.url) images = response.xpath('//*[@id="captcha_image"]/@src').extract_first() all_id = response.xpath('//*[@id="lzform"]/div[5]/div/div/input[2]/@value').extract_first() if images: with open(
'a.jpg', 'wb+')as f: response1 = requests.get(url=images) f.write(response1.content) fromdata['captcha-id'] = all_id fromdata["captcha-solution"] = get_number() print(fromdata) urls = response.url yield scrapy.FormRequest(urls, formdata=fromdata, callback=self.after_login) def after_login(self,response): all_title
= response.xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/a/span[1]/text()').extract() print(all_title)