1. 程式人生 > >scrapy模擬使用者登入

scrapy模擬使用者登入

scrapy框架編寫模擬使用者登入的三種方式:

方式一:攜帶cookie登入,攜帶cookie一般請求的url為登入後的頁面,獲取cookie資訊應在登入後的頁面獲取,cookie引數應轉成字典形式

# -*- coding: utf-8 -*-
import re
import scrapy


class RenrenSpider(scrapy.Spider):
    name = 'renren'
    allowed_domains = ['renren.com']
    start_urls = ['http://www.renren.com/966403607/profile
'] #要對star_urls進行請求,則要重寫start_requests def start_requests(self): cookies = 'anonymid=joz9buh7-q7cfyi; depovince=GUZ; _r01_=1; _de=A10BB6D966D15FBA1F90E79AB0D2FDF8; ln_uact=18520877258; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; jebe_key=b605eb90-21b9-4072-9d48-b75b233c1cea%7Cb156ee0bfd56183e1b8eb9e5994eb5ef%7C1543293909743%7C1%7C1543293910671; jebecookies=9700aefc-77a1-49a7-8d74-882aa173e271|||||; JSESSIONID=abcxtZqTI1hOu4WzN0tDw; ick_login=21361cc0-986f-49bc-95f9-de3a9ed54a69; p=29e3cce85947859ee0e1d6264160539f7; first_login_flag=1; t=b6e6ac604c66019acf93cb471550349e7; societyguester=b6e6ac604c66019acf93cb471550349e7; id=966403607; xnsid=ac2d6a1a; loginfrom=syshome; wp_fold=0
' # 將cookies分割成字典形式 cookies = { i.split("=")[0]:i.split("=")[1] for i in cookies.split("; ") } # 回撥中攜帶cookies的值 yield scrapy.Request( self.start_urls[0], cookies=cookies, callback=self.parse, ) def parse(self, response): item
= {} name = re.findall("尚學堂",response.body.decode()) item["name"] = name yield item
為了可以知道cookie確實是在不同的解析函式中傳遞在setting中設定一個引數如下
# 該設定可以知道cookie確實是在不同的解析函式中傳遞
COOKIES_DEBUG=True

方式二:表單dataform的post請求,

# -*- coding: utf-8 -*-
import re
import scrapy


class GithubSpider(scrapy.Spider):
    name = 'github'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']
 
    def parse(self, response):
        authenticity_token = response.xpath('//input[@name="authenticity_token"]/@value').extract_first()
        # form表單提交post
        formdata = {
                    "commit": "Sign in",
                    "utf8": "",
                    "authenticity_token": authenticity_token,
                    "login": "sxtpython",
                    "password": "sxt123456"
        }
        yield scrapy.FormRequest(
            # 回撥提交表單跳轉後的url地址
            'https://github.com/session',
            formdata=formdata,
            callback=self.parse_item,
        )
        
    def parse_item(self,response):
        item = {}
        item["name"] = re.findall('sxtpython',response.body.decode())
        yield item

方式三:自動從f響應中找到form表單進行登入

# -*- coding: utf-8 -*-
import re
import scrapy

# 方式三
class Github2Spider(scrapy.Spider):
    name = 'github2'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']

    def parse(self, response):
        
        
        yield scrapy.FormRequest.from_response(
            response,#自動從該響應中獲取form表單
            formdata={
                'login':'sxtpython',
                'password':'sxt123456'
            },
            callback=self.parse_item,
        )
    def parse_item(self,response):
        item = {}
        item["name"] = re.findall('sxtpython',response.body.decode())
        yield item