1. 程式人生 > 實用技巧 >使用python對小說更新進行提醒

使用python對小說更新進行提醒

總管寫的書一直都很喜歡,從《雪中悍刀行》到《劍來》。
其實我還是最喜歡那個雪中的滑鼠墊,哈哈哈

針對筆趣閣小說進行資料爬取

上原始碼

#filename=get_data.py
# -*-coding:utf-8 -*-
# BY WANGCC


from bs4 import BeautifulSoup
import urllib.request
import os
from send_mail import sms
from ip_to_mysql import mysql_proxies
import logger
log = logger.Logger("debug")


test_file="劍來" + ".txt"
def gain_html_content(url):
    """獲取網頁的html內容
        url:目標url地址
        content:返回的頁面內容
    """
    # 構建請求物件

    headers = {
         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
     }
    # proxies = mysql_proxies()
    # print(proxies)
    #
    # request = urllib.request.Request(url,headers=headers)
    proxies=mysql_proxies()
    proxies_str=proxies.split(":")
    Agreement=proxies_str[0]
    ip=str(proxies_str[1])
    port=str(proxies_str[2])
    proxies_new=(ip[2:]+":"+port)
    # 構建代理Handler
    #http://111.26.9.26:80
    httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new})
    #httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'})

    opener = urllib.request.build_opener(httpproxy_handler)
    request = urllib.request.Request(url=url,headers=headers)
    #request = urllib.request.Request(url,headers=header)
    response = opener.open(request)

    log.info('獲取代理成功,請求頁面成功!')
    # 傳送請求
    #response = urllib.request.urlopen(request)
    # 讀取檔案
    content = response.read().decode('utf-8')
    return content


def get_chapter(content):
    # 先構建一個soup物件
    soup = BeautifulSoup(content, "lxml")
    # 找到小說的內容(是在div標籤裡面,並且這個div標籤的id為"list")
    content1 = soup.find("meta", property="og:novel:latest_chapter_name")
    content=content1['content']

    return content

def readfile(content):
    if not os.path.exists(test_file):
        write2file(content)
        log.info('將當前內容寫入文件,生成劍來.txt文件')
    with open(test_file, 'r',encoding='utf-8') as f:
        str=f.read()
        log.info('讀取劍來.txt文件')
    return str


def write2file(content):
    """將小說寫入本地檔案"""
    with open(test_file, 'w',encoding='utf-8') as f:
        f.write(content)
    log.info('將小說寫入本地檔案,生成劍來.txt文件')



def main():
    # 獲取頁面內容
    tar_url = 'https://www.qu.la/book/31177/'
    content_url = gain_html_content(tar_url)
    log.info('頁面下載完成')
    content=get_chapter(content_url)
    old_str=readfile(content)
    if content == old_str:
        log.info("沒更新呢!")
    else:
        write2file(content)
        sms(content)
        log.info('傳送郵件提醒')

#main()

if __name__ == "__main__":
    main()
傳送郵件部分
# -*-coding:utf-8 -*-
# BY WANGCC
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import logger
log = logger.Logger("debug")


smtpserver = 'smtp.163.com'
username = '[email protected]'
password = 'xxxxxx'
sender = '[email protected]'
# receiver='[email protected]'
# 收件人為多個收件人
receiver = ['[email protected]','[email protected]']
#這裡使用執行商郵箱可以配置簡訊提醒,非常好用,就像簡訊提醒一樣

def sms(contect):
    print("input sms...")
    subject = contect
    #通過Header物件編碼的文字,包含utf-8編碼資訊和Base64編碼資訊。以下中文名測試ok
    #subject = '中文標題'
    #subject=Header(subject, 'utf-8').encode()

    #構造郵件物件MIMEMultipart物件
    #下面的主題,發件人,收件人,日期是顯示在郵件頁面上的。
    msg = MIMEMultipart('mixed')
    msg['Subject'] = subject
    msg['From'] = 'wangcc <[email protected]>'
    #msg['To'] = '[email protected]'
    #收件人為多個收件人,通過join將列表轉換為以;為間隔的字串
    msg['To'] = ";".join(receiver)
    #msg['Date']='2019-3-16'

    #構造文字內容
    text = "小說更新了!"
    text_plain = MIMEText(text,'plain', 'utf-8')
    msg.attach(text_plain)


    smtp = smtplib.SMTP_SSL(host='smtp.163.com')
    smtp.connect(host='smtp.163.com',port=465)
    #我們用set_debuglevel(1)就可以打印出和SMTP伺服器互動的所有資訊。
    #smtp.set_debuglevel(1)
    smtp.login(username, password)
    print("進入傳送")
    smtp.sendmail(sender, receiver, msg.as_string())
    print('success....')
    s_receiver=str(receiver)
    log.info('傳送提醒郵件給:'+s_receiver)

    smtp.quit()

if __name__ == "__main__":
    sms('c測試~~')
資料庫連線
# -*-coding:utf-8 -*-
# BY WANGCC

import pymysql,datetime
import logger,random

log = logger.Logger("debug")

DB_CONFIG = {
    "host": "xxxxxxxx",
    "port": xxxxx,
    "user": "xxxx",
    "passwd": "111111111",
    "db": "xxxxx",
    "charset": "utf8"
}

def get_random():
    numbers = range(1,10)
    chosen = random.choice(numbers)
    return chosen

def mysql(ip_list):
    # 開啟資料庫連線
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法獲取操作遊標
    cursor = db.cursor()
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for ip in ip_list:
        check_sql="select count(*) from ip_original where ip='%s'"%(ip)
        insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date)
        cursor.execute(check_sql)
        number=cursor.fetchall()
        new_num=number[0][0]
        if number[0][0] == 0:
            try:
                # 執行sql語句
                cursor.execute(insert_sql)
                log.info(ip+'insert to ip_original success!')
                # 提交到資料庫執行
                db.commit()
            except Exception as e:
                log.info('執行sql-->'+insert_sql+'fail')
                # 發生錯誤時回滾
                db.rollback()
        else:
            log.info(ip+': is existence !!',)
    # 關閉資料庫連線
    db.close()

#採集用一個ip代理
def mysql_proxies():
    # 開啟資料庫連線
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法獲取操作遊標
    cursor = db.cursor()
    check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 "
    cursor.execute(check_sql)
    number=cursor.fetchmany(10)
    chose=get_random()
    proxies=number[chose][1]
    print(proxies)
# 關閉資料庫連線
    db.close()
    return proxies

#驗證用一個ip代理
def mysql_old():
    # 開啟資料庫連線
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法獲取操作遊標
    cursor = db.cursor()
    check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 "
    cursor.execute(check_sql)
    number=cursor.fetchmany(10)
    chose=get_random()
    proxies=number[chose][1]
    print(proxies)
# 關閉資料庫連線
    db.close()
    return proxies



#刪除一條資料
def mysql_delete(proxies):
    # 開啟資料庫連線
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法獲取操作遊標
    cursor = db.cursor()
    check_sql="delete  from ip_original  where ip = '%s'"%(proxies)
    log.info('delete ip-->'+check_sql)
    cursor.execute(check_sql)
    db.commit()


# 關閉資料庫連線
    db.close()
    return proxies

#更新來源和驗證時間
def mysql_update(str_from,proxies_yuan):
    # 開啟資料庫連線
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法獲取操作遊標
    cursor = db.cursor()
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    update_sql = "update  ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox
ies_yuan)
    try:
        print(update_sql)
        # 執行sql語句
        cursor.execute(update_sql)
        log.info(proxies_yuan+'---->'+str_from+'--> updata success!')
        # 提交到資料庫執行
        db.commit()
    except Exception as e:
        log.info(str_from+'failed')
        print(e)
        # 發生錯誤時回滾
        db.rollback()
    # 關閉資料庫連線
    db.close()

if  __name__=="__main__":
    ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80',
               'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080',
               'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080',
               'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80',
               'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080',
               'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80',
               'http://175.139.252.194:80', 'http://47.94.217.37:80']
    #mysql(ip_list)
    number=mysql_proxies()

思路

每次爬取,從資料庫隨機抽一個代理ip來用,如果沒用就銷燬。
資料爬取後,存在本地txt。留著和下次作比對,如果一致則更新,併發送郵件。