scrapy知乎模擬登入和cookie登入
阿新 • • 發佈:2019-01-03
模擬登入
# -*- coding: utf-8 -*- import scrapy from scrapy import cmdline #from scrapy.spiders import CrawlSpider import scrapy from scrapy.contrib.spiders.crawl import CrawlSpider from astropy.io.fits.header import Header class ZhihuUserSpider(CrawlSpider): name = "zhihu_user" allowed_domains = ['zhihu.com'] start_urls = ["http://www.zhihu.com"] headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36' } def start_requests(self): #登陸 return [scrapy.FormRequest( "http://www.zhihu.com/login/phone_num", formdata = { '_xsrf':'7ecec739ea7f9e42d3a605e2c44883d3', 'remember_me':'true', 'password':'******', 'phone_num':'******' }, headers=self.headers, #cookies = self.cook, callback = self.after_login, )] def after_login(self, response): print 'after login' yield scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities',callback=self.onetwo,headers=self.headers) def onetwo(self,response): print response.body if __name__ == '__main__': scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user'])
Cookie登入
# -*- coding: utf-8 -*- import scrapy from scrapy import cmdline #from scrapy.spiders import CrawlSpider import scrapy from scrapy.contrib.spiders.crawl import CrawlSpider from astropy.io.fits.header import Header class ZhihuUserSpider(CrawlSpider): name = "zhihu_user1" allowed_domains = ['zhihu.com'] start_urls = ["http://www.zhihu.com"] headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36' } cook = { 'z_c0':'Mi4wQUhDQ2dzeEhlQXNBQU1JWGNGVnJDeGNBQUFCaEFsVk5NYlAwV0FDS0VaWkppOXI4LWtPZzJ0V3E5MXlhcWh0MTh3|1489839665|2c583ba1ed021db1f404d335d5958102386285c6' } def start_requests(self): #登陸 return [scrapy.Request('https://www.zhihu.com/people/shuangyueliao-82/activities', cookies = self.cook,callback=self.after_login,headers=self.headers)] def after_login(self, response): print 'after login' print response.body if __name__ == '__main__': scrapy.cmdline.execute(argv=['scrapy','crawl','zhihu_user1'])