小說python,這是簡版
阿新 • • 發佈:2018-12-15
# -*- coding: utf-8 -*- from bs4 import BeautifulSoup import requests import codecs def get_url_list(url): html = requests.get(url) soup = BeautifulSoup(html.content, 'lxml')#content如果換成text會有亂碼 url_list = [] list = soup.select("#list > dl > dd > a") for i in list: i = i.get("href") i = 'http://www.biqugecom.com' + i url_list.append(i) url_list = url_list[9:-1] print url_list return url_list def get_data(url): html = requests.get(url) soup = BeautifulSoup(html.content, 'lxml') fo = codecs.open('output.txt', 'a+', 'utf-8'); # 以二進位制寫入章節題目 需要轉換為utf-8編碼,否則會出現亂碼 section_name = soup.select("#wrapper > div.content_read > div > div.bookname > h1")[0].text print section_name fo.write(('\r\n' + section_name + '\r\n')) section_text = soup.select("#content") for x in section_text: a = x.text.replace('readx();', '').replace('www.biqugecom.com/20/20341/', '') fo.write((a)+ '\r\n') # 以二進位制寫入章節內容 fo.close() # 關閉小說檔案 if '__main__' == __name__: url = 'http://www.biqugecom.com/34/34055/' url_list = get_url_list(url) for n in url_list: get_data (n)