1. 程式人生 > >scrapy知乎模擬登入和cookie登入

scrapy知乎模擬登入和cookie登入

模擬登入
# -*- coding: utf-8 -*-
import scrapy

from scrapy import cmdline


#from scrapy.spiders import CrawlSpider

import scrapy
from scrapy.contrib.spiders.crawl import CrawlSpider
from astropy.io.fits.header import Header


class ZhihuUserSpider(CrawlSpider):
    name = "zhihu_user"
    allowed_domains = ['zhihu.com']
    start_urls = ["http://www.zhihu.com"]
    headers = {  
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'  
}  

    def start_requests(self):     #登陸
        return [scrapy.FormRequest(
            "http://www.zhihu.com/login/phone_num",
            
            formdata = {
                '_xsrf':'7ecec739ea7f9e42d3a605e2c44883d3',
                'remember_me':'true',
                'password':'******',
                'phone_num':'******'
                },
                headers=self.headers,
                #cookies = self.cook,
                
            callback = self.after_login,
            
            )]

    def after_login(self, response):
        print 'after login'
        yield scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities',callback=self.onetwo,headers=self.headers)
        
    def onetwo(self,response):
        print response.body
if __name__ == '__main__':
    scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user'])
Cookie登入
# -*- coding: utf-8 -*-
import scrapy

from scrapy import cmdline


#from scrapy.spiders import CrawlSpider

import scrapy
from scrapy.contrib.spiders.crawl import CrawlSpider
from astropy.io.fits.header import Header


class ZhihuUserSpider(CrawlSpider):
    name = "zhihu_user1"
    allowed_domains = ['zhihu.com']
    start_urls = ["http://www.zhihu.com"]
    headers = {  
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'  
}  
    
    
    
    cook = {
        'z_c0':'Mi4wQUhDQ2dzeEhlQXNBQU1JWGNGVnJDeGNBQUFCaEFsVk5NYlAwV0FDS0VaWkppOXI4LWtPZzJ0V3E5MXlhcWh0MTh3|1489839665|2c583ba1ed021db1f404d335d5958102386285c6'
        }


    
    def start_requests(self):     #登陸
        return [scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities', cookies = self.cook,callback=self.after_login,headers=self.headers)]

    def after_login(self, response):
        
        print 'after login'
        print response.body
        
   
if __name__ == '__main__':
    scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user1'])