python爬取”頂點小說網“《純陽劍尊》的示例程式碼
阿新 • • 發佈:2020-10-19
爬取”頂點小說網“《純陽劍尊》
程式碼
import requests from bs4 import BeautifulSoup # 反爬 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML,\ like Gecko) Chrome/70.0.3538.102 Safari/537.36' } # 獲得請求 def open_url(url): response = requests.get(url,headers=headers) response.encoding = response.apparent_encoding html = response.text return html # 提取標題 def get_title(url): soup = BeautifulSoup(url,'lxml') title_tag = soup.find('dd') title = '\n' + title_tag.h1.get_text() + '\n' return title # 提取文字 def get_texts(url): soup2 = BeautifulSoup(url,'lxml') text_tags = soup2.find_all('dd',id="contents") return text_tags # 儲存標題 def save_title(filename,title): with open(filename,'a+',encoding='utf-8') as file: file.write(title) # 儲存文字 def save_text(filename,text): with open(filename,encoding='utf-8') as file: file.write(text) # 主程式函式 def main(): num = input('《純陽劍尊》你想要下載第幾章?(1-802)') num = int(num) number = 8184027 + num url = 'https://www.23us.so/files/article/html/15/15905/' + str(number) + '.html' filename = '純陽劍尊.txt' r = open_url(url) title = get_title(r) tags = get_texts(r) save_title(filename,title) for text_tag in tags: text = text_tag.get_text() + '\n' save_text(filename,text) print('第{}章已經下載完成!'.format(num)) if __name__ == '__main__': main()
爬取結果:
以上就是python爬取”頂點小說網“《純陽劍尊》的示例程式碼的詳細內容,更多關於python 爬取頂點小說網的資料請關注我們其它相關文章!