1. 程式人生 > 其它 >實現自動英譯漢、漢譯英翻譯

實現自動英譯漢、漢譯英翻譯

技術標籤:python爬蟲

裡面有幾個問題,請大神講講

'''
日期:2021年1月10日
百度翻譯 利用第三方庫js解析fromdata裡面的sign值
js裡找到 i ,u 引數
這個指令碼僅供學習交流,小白一枚大神勿噴
缺點:過於簡單
'''

import requests
import jsonpath
import execjs
from fake_useragent import UserAgent


class BaiDu():
    def __init__(self, word):
        # 此處不明白,加self和不加self的區別,不加也能呼叫
        self.
word = word self.headers = { "User-Agent": UserAgent().random, 'cookie': 'PSTM=1592717775; BIDUPSID=BE1B387567083C74F8D5CED3C828E0B8; BAIDUID=F7CE4B6E6C66296A3614B1CD6F6BF553:FG=1; BDUSS=d6WVVYTHhNfm8zbkJwbks5QTVvSX5EamZmaE1SRGtyRWxtTzRQMW5SR2poZ0ZnRVFBQUFBJCQAAAAAAAAAAAEAAAAqyXQMc2EzMzY2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKP52V-j-dlfam; BDUSS_BFESS=d6WVVYTHhNfm8zbkJwbks5QTVvSX5EamZmaE1SRGtyRWxtTzRQMW5SR2poZ0ZnRVFBQUFBJCQAAAAAAAAAAAEAAAAqyXQMc2EzMzY2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKP52V-j-dlfam; __yjs_duid=1_73118e33f40aaaa3deddfd28764729e71608950605967; H_PS_PSSID=33423_33419_33261_31254_33284_33287_33350_33460_22160_33370; delPer=0; PSINO=2; BA_HECTOR=aga50l042l20a400jc1fvi7p20q; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=F7CE4B6E6C66296A3614B1CD6F6BF553:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1610161957; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=5315d7c053acd0a3c1511051724e32c095fcd098_1610161956_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1610162461; ab_sr=1.0.0_MzAzZDQ3NWI5ZTI3NTUwMWRjMjliZmVkZTNjNTcxZWNlNDdjYmViZWJiNWY1NmZjZTlmZjVmOWJiZTMzNDc5ZTJkYzMwZDM2NzlhNGRhOWE4N2I0OTczZjE3NTVlY2Vm; __yjsv5_shitong=1.0_7_8551d242d1db0166b8486cb44d581e04fbbc_300_1610162460533_60.1.128.76_305b5505'
} self.From, self.To = self.get_lang() self.fromdata = { 'from': f'{self.From}', 'to': f'{self.To}', 'query': f'{self.word}', 'simple_means_flag': '3', 'sign': self.get_sign(), 'token': '08a031823bb6373a2cbc56171ca5e03a'
, 'domain': ' common' } def get_sign(self): ''' 通過第三方庫獲取js資料,這裡並沒有提供js檔案,可以百度搜索一下。挺多的 :return: ''' with open('baidu-test.js', 'r') as f: jsdata = f.read() jsdata = execjs.compile(jsdata).call('e', f'{self.word}') return jsdata def get_lang(self): ''' 自動翻譯語言,目前只支援漢譯英、英譯漢 :return: ''' lang_url = 'https://fanyi.baidu.com/langdetect' lang_data = { 'query': f'{self.word}' } lang = requests.post(lang_url, headers=self.headers, data=lang_data) # 單詞輸入錯誤會返回 nor。 From = lang.json()['lan'] To = 'en' if From == 'nor': print('輸入錯誤') elif From != 'zh': To = 'zh' return From, To def get_html(self): ''' 翻譯請求 :return: ''' #此處不明白,加self和不加self的區別,好像同樣能呼叫 re = requests.post(f'https://fanyi.baidu.com/v2transapi?from={self.From}&to={self.To}', headers=self.headers, data=self.fromdata) return re.json() if __name__ == '__main__': word = input('請輸入你的單詞:') baidu = BaiDu(word) fanyi = jsonpath.jsonpath(baidu.get_html(), "$..dst") print(fanyi)