1. 程式人生 > >【python】python每天抓取一篇英語美文,發送到郵箱

【python】python每天抓取一篇英語美文,發送到郵箱

lib 郵件發送 建立 dirname write path div style 需要

import requests,os,time
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText

header = {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64)
                         AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36}
#TODO 進入主頁面的函數,找到文章鏈接
def
findEssay(rootUrl,pages,recordsPath): try: htmlpp = requests.get(rootUrl, headers=header) html = htmlpp.text.encode(htmlpp.encoding) # 獲取dirname dirUrl1 = os.path.dirname(rootUrl) dirUrl = os.path.dirname(dirUrl1) soup = BeautifulSoup(html,
html.parser) essayTags = soup.select(.node_list a) # 得到當前頁面的文章Tag值 for essayTag in essayTags: essayUrl = essayTag.get(href) # 獲取文章的鏈接 essayName = essayTag.text # 獲取文章的名字 if essayUrl not in pages: downloadEssay(dirUrl, essayUrl, essayName) pages.add(essayUrl) with open(recordsPath,
a+) as attach: attach.write(str(essayUrl) + \n) print(寫入記錄成功) return nextPageBaseUrl = soup.select(.page a)[-2].get(href) #取得下一頁的鏈接 nextPageUrl = os.path.join(dirUrl1,nextPageBaseUrl) #組裝成完整的鏈接 findEssay(nextPageUrl,pages,recordsPath) except Exception as e: print(根鏈接出現錯誤+str(e)) #TODO 下載文章內容 def downloadEssay(dirUrl,essayUrl,essayName): try: htmlpp = requests.get(dirUrl + essayUrl) html = htmlpp.text.encode(htmlpp.encoding) soup = BeautifulSoup(html, html.parser) paras = soup.select(#dede_content div) mailTo(essayName,paras) except Exception as e: with open(recordsPath, a+) as attach: attach.write(str(essayUrl) + \n) findEssay(rootUrl, pages, recordsPath) print(下載文章失敗 +str(e)) #TODO 制作成word文檔,命名為日期,發送郵件的函數,發送完成刪除文件 def mailTo(essayName,paras): content = "" for para in paras: content = content + <p> + para.getText() + </p> # #發送方郵件地址 sender = 發件人@163.com # 發送方郵件密碼 pwd = input(Password: ) receivers = [收件人[email protected],收件人[email protected]] # 輸入一個你要收取郵件的郵箱地址 # 郵件的內容、收件人、發件人信息 mail_message = <html><body><h1>+essayName+</h1> + <article>+content+</article> + </body></html> message = MIMEText(mail_message, html, utf-8) # 發送含HTML內容的郵件 message[To] = ;.join(receivers) # 填入收件人郵箱地址 message[From] = sender # 填入發件人郵箱地址 # 郵件的標題 today = time.strftime(%y%m%d) # 以當前日期命名文檔 today = str(today) subject = 今日美文+today message[Subject] = subject # 可以不設置編碼 try: smtpObj = smtplib.SMTP_SSL(smtp.163.com, 465) # 網易163郵箱 使用非本地服務器,需要建立和網易郵件服務 的SSL鏈接,端口465 smtpObj.login(sender, pwd) # 登錄認證 smtpObj.sendmail(sender, receivers, message.as_string()) # 發送郵件主題 print(郵件發送成功!) smtpObj.quit() except smtplib.SMTPException as e: print(郵件發送失敗,失敗原因:, e) if __name__ == __main__: recordsPath = ‘C:\\enEssaysToLH.txt pages = set() if not os.path.exists(recordsPath): with open(recordsPath,w): print(創建記錄文件) with open(recordsPath,r) as readFile: for line in readFile.readlines(): pages.add(line.rstrip()) #TODO 解析主鏈接,生成dirname,進入主頁面的函數 rootUrl = http://www.enread.com/essays/index.html findEssay(rootUrl,pages,recordsPath)

發送了很多次郵件,每次英文做主題(subject)的時候,都會出現554問題。當把郵件的題目統一換成中文後,同一個文章就能發送出去。可能這裏面涉及了編碼的問題,待以後研究。

【python】python每天抓取一篇英語美文,發送到郵箱