python 爬取排行榜小說和文字
阿新 • • 發佈:2019-01-08
# -*- coding: utf-8 -*-
import scrapy
import sys
sys.path.append("D:\\pycodes\\novel")
class XiaoshuoSpider(scrapy.Spider):
name = 'xiaoshuo'
start_urls = ['https://www.qu.la/paihangbang/']
novel_list=[]
def parse(self, response):
global i
i=0
for sel in response.xpath("//div[@ class='topbooks']" ):
book_name=sel.xpath(".//a/text()").extract()
book_hrefs=sel.xpath(".//a/@href").extract()
for href in book_hrefs:
count=0
url = 'https://www.qu.la'+href
count+=1
yield scrapy.Request(url,callback=self.parse_book)
def parse_book(self,response):
volume_hrefs=response.xpath("//dd/a/@href").extract()
volume_name=response.xpath("//dd/a/text()").extract()
for href in volume_hrefs:
count = 0
url='https://www.qu.la'+href
count+=1
yield scrapy.Request(url,callback=self.parse_content)
def parse_content(self,response):
filename=response.xpath("//a[@href='./']/text()").extract_first()
volumename=response.xpath("//h1/text()").extract_first()
print (filename,volumename)
body=response.xpath("//div[@id='content']/text()").extract()
content = "".join(body).strip().replace("\u3000"," ")
item['name']=str(filename)
item['volume']=str(volumename)
item['text']=str(content)
path="D:/novels/"
if i == 1:
f=open(path+"xh/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i==2:
f=open(path+"wx/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i == 3:
f=open(path+"ds/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i == 4:
f=open(path+"ls/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i ==5:
f=open(path+"kh/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i == 6:
f=open(path+"wy/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
elif i == 7:
f=open(path+"ns/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")
else:
f=open(path+"wb/"+"{}.txt".format(filename),"a",encoding='utf-8')
f.write(str(volumename)+"\n"+str(content)+"\n\n")