使用python對小說更新進行提醒
阿新 • • 發佈:2020-09-10
總管寫的書一直都很喜歡,從《雪中悍刀行》到《劍來》。
其實我還是最喜歡那個雪中的滑鼠墊,哈哈哈
針對筆趣閣小說進行資料爬取
上原始碼
#filename=get_data.py # -*-coding:utf-8 -*- # BY WANGCC from bs4 import BeautifulSoup import urllib.request import os from send_mail import sms from ip_to_mysql import mysql_proxies import logger log = logger.Logger("debug") test_file="劍來" + ".txt" def gain_html_content(url): """獲取網頁的html內容 url:目標url地址 content:返回的頁面內容 """ # 構建請求物件 headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36" } # proxies = mysql_proxies() # print(proxies) # # request = urllib.request.Request(url,headers=headers) proxies=mysql_proxies() proxies_str=proxies.split(":") Agreement=proxies_str[0] ip=str(proxies_str[1]) port=str(proxies_str[2]) proxies_new=(ip[2:]+":"+port) # 構建代理Handler #http://111.26.9.26:80 httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new}) #httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'}) opener = urllib.request.build_opener(httpproxy_handler) request = urllib.request.Request(url=url,headers=headers) #request = urllib.request.Request(url,headers=header) response = opener.open(request) log.info('獲取代理成功,請求頁面成功!') # 傳送請求 #response = urllib.request.urlopen(request) # 讀取檔案 content = response.read().decode('utf-8') return content def get_chapter(content): # 先構建一個soup物件 soup = BeautifulSoup(content, "lxml") # 找到小說的內容(是在div標籤裡面,並且這個div標籤的id為"list") content1 = soup.find("meta", property="og:novel:latest_chapter_name") content=content1['content'] return content def readfile(content): if not os.path.exists(test_file): write2file(content) log.info('將當前內容寫入文件,生成劍來.txt文件') with open(test_file, 'r',encoding='utf-8') as f: str=f.read() log.info('讀取劍來.txt文件') return str def write2file(content): """將小說寫入本地檔案""" with open(test_file, 'w',encoding='utf-8') as f: f.write(content) log.info('將小說寫入本地檔案,生成劍來.txt文件') def main(): # 獲取頁面內容 tar_url = 'https://www.qu.la/book/31177/' content_url = gain_html_content(tar_url) log.info('頁面下載完成') content=get_chapter(content_url) old_str=readfile(content) if content == old_str: log.info("沒更新呢!") else: write2file(content) sms(content) log.info('傳送郵件提醒') #main() if __name__ == "__main__": main()
傳送郵件部分
# -*-coding:utf-8 -*- # BY WANGCC import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import logger log = logger.Logger("debug") smtpserver = 'smtp.163.com' username = '[email protected]' password = 'xxxxxx' sender = '[email protected]' # receiver='[email protected]' # 收件人為多個收件人 receiver = ['[email protected]','[email protected]'] #這裡使用執行商郵箱可以配置簡訊提醒,非常好用,就像簡訊提醒一樣 def sms(contect): print("input sms...") subject = contect #通過Header物件編碼的文字,包含utf-8編碼資訊和Base64編碼資訊。以下中文名測試ok #subject = '中文標題' #subject=Header(subject, 'utf-8').encode() #構造郵件物件MIMEMultipart物件 #下面的主題,發件人,收件人,日期是顯示在郵件頁面上的。 msg = MIMEMultipart('mixed') msg['Subject'] = subject msg['From'] = 'wangcc <[email protected]>' #msg['To'] = '[email protected]' #收件人為多個收件人,通過join將列表轉換為以;為間隔的字串 msg['To'] = ";".join(receiver) #msg['Date']='2019-3-16' #構造文字內容 text = "小說更新了!" text_plain = MIMEText(text,'plain', 'utf-8') msg.attach(text_plain) smtp = smtplib.SMTP_SSL(host='smtp.163.com') smtp.connect(host='smtp.163.com',port=465) #我們用set_debuglevel(1)就可以打印出和SMTP伺服器互動的所有資訊。 #smtp.set_debuglevel(1) smtp.login(username, password) print("進入傳送") smtp.sendmail(sender, receiver, msg.as_string()) print('success....') s_receiver=str(receiver) log.info('傳送提醒郵件給:'+s_receiver) smtp.quit() if __name__ == "__main__": sms('c測試~~')
資料庫連線
# -*-coding:utf-8 -*- # BY WANGCC import pymysql,datetime import logger,random log = logger.Logger("debug") DB_CONFIG = { "host": "xxxxxxxx", "port": xxxxx, "user": "xxxx", "passwd": "111111111", "db": "xxxxx", "charset": "utf8" } def get_random(): numbers = range(1,10) chosen = random.choice(numbers) return chosen def mysql(ip_list): # 開啟資料庫連線 db = pymysql.connect( host=DB_CONFIG["host"], port=DB_CONFIG["port"], user=DB_CONFIG["user"], passwd=DB_CONFIG["passwd"], db=DB_CONFIG["db"], charset=DB_CONFIG["charset"]) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") for ip in ip_list: check_sql="select count(*) from ip_original where ip='%s'"%(ip) insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date) cursor.execute(check_sql) number=cursor.fetchall() new_num=number[0][0] if number[0][0] == 0: try: # 執行sql語句 cursor.execute(insert_sql) log.info(ip+'insert to ip_original success!') # 提交到資料庫執行 db.commit() except Exception as e: log.info('執行sql-->'+insert_sql+'fail') # 發生錯誤時回滾 db.rollback() else: log.info(ip+': is existence !!',) # 關閉資料庫連線 db.close() #採集用一個ip代理 def mysql_proxies(): # 開啟資料庫連線 db = pymysql.connect( host=DB_CONFIG["host"], port=DB_CONFIG["port"], user=DB_CONFIG["user"], passwd=DB_CONFIG["passwd"], db=DB_CONFIG["db"], charset=DB_CONFIG["charset"]) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 " cursor.execute(check_sql) number=cursor.fetchmany(10) chose=get_random() proxies=number[chose][1] print(proxies) # 關閉資料庫連線 db.close() return proxies #驗證用一個ip代理 def mysql_old(): # 開啟資料庫連線 db = pymysql.connect( host=DB_CONFIG["host"], port=DB_CONFIG["port"], user=DB_CONFIG["user"], passwd=DB_CONFIG["passwd"], db=DB_CONFIG["db"], charset=DB_CONFIG["charset"]) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 " cursor.execute(check_sql) number=cursor.fetchmany(10) chose=get_random() proxies=number[chose][1] print(proxies) # 關閉資料庫連線 db.close() return proxies #刪除一條資料 def mysql_delete(proxies): # 開啟資料庫連線 db = pymysql.connect( host=DB_CONFIG["host"], port=DB_CONFIG["port"], user=DB_CONFIG["user"], passwd=DB_CONFIG["passwd"], db=DB_CONFIG["db"], charset=DB_CONFIG["charset"]) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() check_sql="delete from ip_original where ip = '%s'"%(proxies) log.info('delete ip-->'+check_sql) cursor.execute(check_sql) db.commit() # 關閉資料庫連線 db.close() return proxies #更新來源和驗證時間 def mysql_update(str_from,proxies_yuan): # 開啟資料庫連線 db = pymysql.connect( host=DB_CONFIG["host"], port=DB_CONFIG["port"], user=DB_CONFIG["user"], passwd=DB_CONFIG["passwd"], db=DB_CONFIG["db"], charset=DB_CONFIG["charset"]) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") update_sql = "update ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox ies_yuan) try: print(update_sql) # 執行sql語句 cursor.execute(update_sql) log.info(proxies_yuan+'---->'+str_from+'--> updata success!') # 提交到資料庫執行 db.commit() except Exception as e: log.info(str_from+'failed') print(e) # 發生錯誤時回滾 db.rollback() # 關閉資料庫連線 db.close() if __name__=="__main__": ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80', 'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080', 'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080', 'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80', 'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080', 'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80', 'http://175.139.252.194:80', 'http://47.94.217.37:80'] #mysql(ip_list) number=mysql_proxies()
思路
每次爬取,從資料庫隨機抽一個代理ip來用,如果沒用就銷燬。
資料爬取後,存在本地txt。留著和下次作比對,如果一致則更新,併發送郵件。