python追蹤小說更新並通過郵件提醒
阿新 • • 發佈:2018-06-09
ID lse ini ret end href string pan xxxxxx
#!/usr/bin/env python #coding:utf-8 from urllib import request, parseimport os import time import random from urllib.error import URLError, HTTPError from email import encoders from email.header import Header from email.mime.text import MIMEText from email.utils import parseaddr, formataddr from email.mime.multipart importMIMEMultipart import smtplib from lxml import etree class Spider: def __init__(self): self.old_title_qidian = list() self.old_title_zongheng = list() def check_update(self): #從文件中讀取上次記錄的最新章節 f = open("qidian.txt", ‘r‘) self.old_title_qidian = [] link_qidian= [] n = 0 for line in open(‘qidian.txt‘): if(n % 2 == 1): line = f.readline() line = line.strip() self.old_title_qidian.append(line) else: line = f.readline() line = line.strip() link_qidian.append(line) n= n + 1 f.close() f = open("zongheng.txt", ‘r‘) self.old_title_zongheng = [] link_zongheng = [] n = 0 for line in open(‘zongheng.txt‘): if(n % 2 == 1): line = f.readline() line = line.strip() self.old_title_zongheng.append(line) else: line = f.readline() line = line.strip() link_zongheng.append(line) n = n + 1 f.close() print(self.old_title_qidian) print(link_qidian) print(self.old_title_zongheng) #檢測更新 i = 0 for k in link_qidian: self.update_2(i, k) i = i + 1 i = 0 for k in link_zongheng: self.update_3(i, k) i = i + 1 #將新章節名保存 f = open("qidian.txt", "w") length = len(link_qidian) for i in range(length): f.write(link_qidian[i] + ‘\n‘) f.write(self.old_title_qidian[i] + ‘\n‘) f.close() f = open("zongheng.txt", "w") length = len(link_zongheng) for i in range(length): f.write(link_zongheng[i] + ‘\n‘) f.write(self.old_title_zongheng[i] + ‘\n‘) f.close() #獲取html def open_url(self, link): wanted_page = link req = request.Request(wanted_page) req.add_header(‘User-Agent‘, ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36‘ ‘(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393‘) response = request.urlopen(req) html = response.read().decode(‘utf-8‘) return html #檢查是否更新 針對起點中文網 def update_2(self, i, link): html = self.open_url(link) tree = etree.HTML(html) node = tree.xpath(u"/html/body/div[2]/div[6]/div[4]/div[1]/div[1]/div[2]/ul/li[3]/div/p[1]/a/@title") print(node[0]) if(node[0] != self.old_title_qidian[i]): self.old_title_qidian[i] = node[0] new_email = SendEmail(node[0]) new_email.send(link) else: print(0) #檢查是否更新 針對縱橫中文網 def update_3(self, i, link): html = self.open_url(link) tree = etree.HTML(html) node = tree.xpath(u"/html/body/div[6]/div[1]/div/div[3]/a/text()") node[0] = node[0].strip() #起點的標題會多讀取一個換行符,要去除 print(node[0]) if(node[0] != self.old_title_zongheng[i]): self.old_title_zongheng[i] = node[0] new_email = SendEmail(node[0]) new_email.send(link) else: print(0) class SendEmail: def __init__(self, title): self.title = title def send(self, link): #xxxxxxxxx表示郵箱服務授權碼 data_1 = [‘[email protected]‘, ‘xxxxxxxxxx‘, ‘[email protected]‘, ‘smtp.qq.com‘] from_addr = data_1[0] password = data_1[1] to_addr = data_1[2] smtp_server = data_1[3] msg = MIMEMultipart(‘alternative‘) msg[‘From‘] = from_addr msg[‘To‘] = to_addr msg[‘Subject‘] = r‘同誌,小說更新了!!!‘ html = """ <html> <head></head> <body> <p>同誌,最新章節在此:<br> 點擊鏈接立即閱讀<br> <a href= """ + link + """">""" + self.title + """<a><br> <hr style="border:1px dashed #000; height:1px"> <a href ="http://www.bearcarl.top">點擊鏈接加入我們的社區<a><br> </p> </body> </html> """ part1 = MIMEText(html, ‘html‘) msg.attach(part1) try: server = smtplib.SMTP_SSL(smtp_server, 465) server.set_debuglevel(1) server.login(from_addr, password) server.sendmail(from_addr, to_addr, msg.as_string()) print(‘success‘) except server.SMTPException as e: print("failed") finally: server.quit() if __name__ == ‘__main__‘: update_Spider = Spider() while(1): #記錄當前時間 f = open("xs_log.txt", "a") now_time = time.strftime(‘%Y-%m-%d %H : %M : %S‘,time.localtime(time.time())) f.write(now_time + ‘\n‘) f.close() update_Spider.check_update() #每10分鐘檢查一次是否更新 time.sleep(600)
第一次寫的時候提取最新章節用的是正則,後來改了,用的是XPATH。
如果不想用文件保存鏈接信息和最新章節名,可以直接寫在代碼裏面用列表保存。
效果:
python追蹤小說更新並通過郵件提醒