【python】python每天抓取一篇英語美文,發送到郵箱
阿新 • • 發佈:2018-10-11
lib 郵件發送 建立 dirname write path div style 需要
import requests,os,time from bs4 import BeautifulSoup import smtplib from email.mime.text import MIMEText header = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64)‘ ‘ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36‘} #TODO 進入主頁面的函數,找到文章鏈接 deffindEssay(rootUrl,pages,recordsPath): try: htmlpp = requests.get(rootUrl, headers=header) html = htmlpp.text.encode(htmlpp.encoding) # 獲取dirname dirUrl1 = os.path.dirname(rootUrl) dirUrl = os.path.dirname(dirUrl1) soup = BeautifulSoup(html, ‘html.parser‘) essayTags = soup.select(‘.node_list a‘) # 得到當前頁面的文章Tag值 for essayTag in essayTags: essayUrl = essayTag.get(‘href‘) # 獲取文章的鏈接 essayName = essayTag.text # 獲取文章的名字 if essayUrl not in pages: downloadEssay(dirUrl, essayUrl, essayName) pages.add(essayUrl) with open(recordsPath,‘a+‘) as attach: attach.write(str(essayUrl) + ‘\n‘) print(‘寫入記錄成功‘) return nextPageBaseUrl = soup.select(‘.page a‘)[-2].get(‘href‘) #取得下一頁的鏈接 nextPageUrl = os.path.join(dirUrl1,nextPageBaseUrl) #組裝成完整的鏈接 findEssay(nextPageUrl,pages,recordsPath) except Exception as e: print(‘根鏈接出現錯誤‘+str(e)) #TODO 下載文章內容 def downloadEssay(dirUrl,essayUrl,essayName): try: htmlpp = requests.get(dirUrl + essayUrl) html = htmlpp.text.encode(htmlpp.encoding) soup = BeautifulSoup(html, ‘html.parser‘) paras = soup.select(‘#dede_content div‘) mailTo(essayName,paras) except Exception as e: with open(recordsPath, ‘a+‘) as attach: attach.write(str(essayUrl) + ‘\n‘) findEssay(rootUrl, pages, recordsPath) print(‘下載文章失敗 ‘+str(e)) #TODO 制作成word文檔,命名為日期,發送郵件的函數,發送完成刪除文件 def mailTo(essayName,paras): content = "" for para in paras: content = content + ‘<p>‘ + para.getText() + ‘</p>‘ # #發送方郵件地址 sender = ‘發件人@163.com‘ # 發送方郵件密碼 pwd = input(‘Password: ‘) receivers = [‘收件人[email protected]‘,‘收件人[email protected]‘] # 輸入一個你要收取郵件的郵箱地址 # 郵件的內容、收件人、發件人信息 mail_message = ‘<html><body><h1>‘+essayName+‘</h1>‘ + ‘<article>‘+content+‘</article>‘ + ‘</body></html>‘ message = MIMEText(mail_message, ‘html‘, ‘utf-8‘) # 發送含HTML內容的郵件 message[‘To‘] = ‘;‘.join(receivers) # 填入收件人郵箱地址 message[‘From‘] = sender # 填入發件人郵箱地址 # 郵件的標題 today = time.strftime(‘%y%m%d‘) # 以當前日期命名文檔 today = str(today) subject = ‘今日美文‘+today message[‘Subject‘] = subject # 可以不設置編碼 try: smtpObj = smtplib.SMTP_SSL(‘smtp.163.com‘, 465) # 網易163郵箱 使用非本地服務器,需要建立和網易郵件服務 的SSL鏈接,端口465 smtpObj.login(sender, pwd) # 登錄認證 smtpObj.sendmail(sender, receivers, message.as_string()) # 發送郵件主題 print(‘郵件發送成功!‘) smtpObj.quit() except smtplib.SMTPException as e: print(‘郵件發送失敗,失敗原因:‘, e) if __name__ == ‘__main__‘: recordsPath = ‘C:\\enEssaysToLH.txt‘ pages = set() if not os.path.exists(recordsPath): with open(recordsPath,‘w‘): print(‘創建記錄文件‘) with open(recordsPath,‘r‘) as readFile: for line in readFile.readlines(): pages.add(line.rstrip()) #TODO 解析主鏈接,生成dirname,進入主頁面的函數 rootUrl = ‘http://www.enread.com/essays/index.html‘ findEssay(rootUrl,pages,recordsPath)
發送了很多次郵件,每次英文做主題(subject)的時候,都會出現554問題。當把郵件的題目統一換成中文後,同一個文章就能發送出去。可能這裏面涉及了編碼的問題,待以後研究。
【python】python每天抓取一篇英語美文,發送到郵箱