實例:模擬登陸豆瓣
# -*- coding: utf-8 -*-
import scrapy
import urllib.request
# https://accounts.douban.com/login
class DoubanSpider(scrapy.Spider):
name = ‘douban‘
allowed_domains = [‘www.douban.com‘, ‘accounts.douban.com‘]
start_urls = [‘https://accounts.douban.com/login‘]
def parse(self, response):
# 查找驗證碼圖片,看有沒有驗證碼
image = response.xpath(‘//img[@id="captcha_image"]/@src‘)
# 判斷image這個列表是否為空,如果為空,就是沒有驗證碼
if len(image) == 0:
print(‘不帶驗證碼的‘ * 10)
# 不帶驗證碼的
formdata = {
‘source‘: ‘index_nav‘,
‘form_email‘: ‘[email protected]‘,
‘form_password‘: ‘lizhibin666‘,
}
else:
print(‘帶驗證碼的‘ * 10)
# 通過屬性選擇器獲取得到
captchaid = response.css(‘input[name="captcha-id"]::attr(value)‘).extract_first()
# 獲取驗證碼鏈接
image_url = image.extract_first()
# print(‘*‘ * 50)
# print(captchaid)
# print(image_url)
# print(‘*‘ * 50)
urllib.request.urlretrieve(image_url, ‘code.png‘)
code = input(‘請輸入驗證碼:‘)
# 帶驗證碼的
formdata = {
‘source‘: ‘None‘,
‘redir‘: ‘https://www.douban.com/‘,
‘form_email‘: ‘[email protected]‘,
‘form_password‘: ‘lizhibin666‘,
‘captcha-solution‘: code,
‘captcha-id‘: captchaid,
‘login‘: ‘登錄‘,
}
post_url = ‘https://accounts.douban.com/login‘
# 發送post請求
yield scrapy.FormRequest(url=post_url, formdata=formdata, callback=self.lala)
#保存文件,查看是否登錄
def lala(self, response):
print(‘*‘ * 50)
with open(‘douban.html‘, ‘wb‘) as fp:
fp.write(response.body)
print(‘*‘ * 50)
實例:模擬登陸豆瓣