最新有道翻譯爬蟲教程 (python爬蟲)
阿新 • • 發佈:2018-12-21
環境:python3
直接上程式碼:
#coding=utf-8 import requests import json from lxml import etree class YouDaoTranslateWeb: def __init__(self): self.url="http://www.youdao.com/w/eng/{}/#keyfrom=dict2.index" self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36"} def run(self): user_input = input("請輸入要翻譯的內容:") self.url = self.url.format(user_input) response = requests.get(url=self.url,headers=self.headers) strs = response.content.decode() self.html = etree.HTML(strs) def get_result(self): if len(self.html.xpath("//div[@id='results']//div[@id='fanyiToggle']")) > 0 : result = self.html.xpath("//div[@id='results']//div[@id='fanyiToggle']//p/text()") result = result[1] if len(result) > 0 else None elif len(self.html.xpath("//div[@id='results']//div[@class='trans-container']//p/span/a/text()")) > 0: result = self.html.xpath("//div[@id='results']//div[@class='trans-container']//p/span/a/text()") result = result[0] if len(result) > 0 else None else: results = self.html.xpath("//div[@id='scontainer']//div[@class='title']/span/text()") results.pop() result="" for i in results: result+=i.strip()+"\r\n" print("翻譯結果是:") print(result) if __name__ == '__main__': while True: baidu = YouDaoTranslateWeb() baidu.run() baidu.get_result()