scrapy 自動登入給抽屜點贊
阿新 • • 發佈:2018-11-05
# -*- coding: utf-8 -*- import scrapy import sys import io sys.stout = io.TextIOWrapper(sys.stdout.buffer,encoding="gb18030") from scrapy.selector import Selector,HtmlXPathSelector from pyquery import PyQuery from scrapy.http import Request from scrapy.dupefilter import RFPDupeFilter from scrapy.http.cookies import CookieJar class ChoutiSpider(scrapy.Spider): name = 'chouti' allowed_domains = ['chouti.com'] start_urls = ['http://dig.chouti.com/'] cookies_list = None def parse(self, response): # print(response.url) # content = str(response.body, encoding="utf-8") # pq = PyQuery(content) # pages = pq.find("#dig_lcpage li:gt(0)") # for page in pages.items(): # index_web = page.find("a").attr("href") # web = "https://dig.chouti.com%s" % index_web # # print(web) # yield Request(url=web, callback=self.parse)#給排程器用回撥函式解析 cookie_obj = CookieJar() self.cookies_list = cookie_obj._cookies yield Request( url="https://dig.chouti.com/login", method="POST", body="phone=86XXXXXXXXX&password=XXXXXXXXXX&oneMonth=1", headers={"content-type": "application/x-www-form-urlencoded; charset=UTF-8"}, cookies=self.cookies_list, callback=self.login) def login(self,resposne): print(resposne.text) yield Request(url="https://dig.chouti.com/",callback=self.good) def good(self, response): content = str(response.body, encoding="utf-8") pq = PyQuery(content) items = pq.find(".item .part2") for item in items.items(): url = "https://dig.chouti.com/link/vote?linksId=%s" % item.attr("share-linkid") yield Request(url=url, method="POST", cookies=self.cookies_list, callback=self.show) def show(self,response): print(response.text)