1. 程式人生 > >python 爬取排行榜小說和文字

python 爬取排行榜小說和文字

# -*- coding: utf-8 -*-
import scrapy
import sys
sys.path.append("D:\\pycodes\\novel")


class XiaoshuoSpider(scrapy.Spider):
    name = 'xiaoshuo'
    start_urls = ['https://www.qu.la/paihangbang/']
    novel_list=[]

    def parse(self, response):

        global i
        i=0

        for sel in response.xpath("//div[@ class='topbooks']"
): book_name=sel.xpath(".//a/text()").extract() book_hrefs=sel.xpath(".//a/@href").extract() for href in book_hrefs: count=0 url = 'https://www.qu.la'+href count+=1 yield scrapy.Request(url,callback=self.parse_book) def
parse_book(self,response):
volume_hrefs=response.xpath("//dd/a/@href").extract() volume_name=response.xpath("//dd/a/text()").extract() for href in volume_hrefs: count = 0 url='https://www.qu.la'+href count+=1 yield scrapy.Request(url,callback=self.parse_content) def
parse_content(self,response):
filename=response.xpath("//a[@href='./']/text()").extract_first() volumename=response.xpath("//h1/text()").extract_first() print (filename,volumename) body=response.xpath("//div[@id='content']/text()").extract() content = "".join(body).strip().replace("\u3000"," ") item['name']=str(filename) item['volume']=str(volumename) item['text']=str(content) path="D:/novels/" if i == 1: f=open(path+"xh/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i==2: f=open(path+"wx/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i == 3: f=open(path+"ds/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i == 4: f=open(path+"ls/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i ==5: f=open(path+"kh/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i == 6: f=open(path+"wy/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") elif i == 7: f=open(path+"ns/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n") else: f=open(path+"wb/"+"{}.txt".format(filename),"a",encoding='utf-8') f.write(str(volumename)+"\n"+str(content)+"\n\n")