1. 程式人生 > >python呼叫百度翻譯、谷歌翻譯

python呼叫百度翻譯、谷歌翻譯

谷歌翻譯不提供介面,想要使用谷歌的翻譯結果,需要借用爬蟲技術,將待翻譯的文字傳入,抓取頁面,解析出翻譯結果,經測試,這種方法的翻譯效果較差,不建議使用。
百度翻譯提供介面,可在百度翻譯註冊開發者後獲取appid與secretKey直接在python中進行呼叫,這種方法翻譯效果較好,但仍存在一些問題,對於那些翻譯不太好的文字,只能手動複製到谷歌翻譯或必應翻譯中進行翻譯。

#-*-coding:utf-8-*-
# date: 2018-11-07
import requests
import json
import execjs
import hashlib
import urllib
import
random #需要先用pip install pyexecjs 安裝,用來執行js指令碼 class Py4Js(): def __init__(self): self.ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """
) def getTk(self, text): '''google translate請求引數中tk值是根據內容實時變化的,是由js動態生成,因此此函式呼叫js程式碼執行後返回text對應的tk值''' return self.ctx.call("TL", text) def buildUrl(text, tk): baseUrl = 'https://translate.google.cn/translate_a/single' baseUrl += '?client=t&' baseUrl += 's1=auto&'
baseUrl += 't1=zh-CN&' baseUrl += 'h1=zh-CN&' baseUrl += 'dt=at&' baseUrl += 'dt=bd&' baseUrl += 'dt=ex&' baseUrl += 'dt=ld&' baseUrl += 'dt=md&' baseUrl += 'dt=qca&' baseUrl += 'dt=rw&' baseUrl += 'dt=rm&' baseUrl += 'dt=ss&' baseUrl += 'dt=t&' baseUrl += 'ie=UTF-8&' baseUrl += 'oe=UTF-8&' baseUrl += 'otf=1&' baseUrl += 'pc=1&' baseUrl += 'ssel=0&' baseUrl += 'tsel=0&' baseUrl += 'kc=2&' baseUrl += 'tk=' + str(tk) + '&' baseUrl += 'q=' + text return baseUrl def google_translate(text): js = Py4Js() header = { 'authority':'translate.google.cn', 'method':'GET', 'path':'', 'scheme':'https', 'accept':'*/*', 'accept-encoding':'gzip, deflate, br', 'accept-language':'zh-CN,zh;q=0.9', 'cookie':'', 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36', 'x-client-data':'CIa2yQEIpbbJAQjBtskBCPqcygEIqZ3KAQioo8oBGJGjygE=' } url = buildUrl(text, js.getTk(text)) res = '' try: r = requests.get(url) result = json.loads(r.text) if result[7] != None: # 如果我們文字輸錯,提示你是不是要找xxx的話,那麼重新把xxx正確的翻譯之後返回 # 谷歌返回的結果是一個json格式的資料,我們將其變成一個巢狀的list,可以發現該list長度為9,第零個元素就是翻譯結果,第七個結果是一些提示資訊。 try: correctText=result[7][0].replace('<b><i>', ' ').replace('</i></b>', '') print(correctText) correctUrl=buildUrl(correctText,js.getTk(correctText)) correctR=requests.get(correctUrl) newResult=json.loads(correctR.text) res=newResult[0][0][0] except Exception as e: print(e) res = result[0][0][0] else: res = result[0][0][0] except Exception as e: res = '' print(url) print("翻譯"+text+"失敗") print("錯誤資訊:") print(e) finally: return res def baidu_translate(text, from_lang='auto', to_lang='zh', appid = 'your appid', secretKey = 'your secretKey'): url = 'https://api.fanyi.baidu.com/api/trans/vip/translate' salt = random.randint(32768, 65536) sign = appid + text + str(salt) + secretKey m = hashlib.md5() m.update(sign.encode("utf8")) sign = m.hexdigest() url = url + '?appid=' + appid + '&q=' + urllib.parse.quote(text) + '&from=' + from_lang + '&to=' + to_lang + '&salt=' + str(salt) + '&sign=' + sign try: result = eval(requests.get(url).text) result = result['trans_result'][0]['dst'] except Exception as e: result = '' print (e) finally: return result if __name__ == '__main__': res = google_translate('oh shit') print(res)

參考: