Python之爬蟲-- js加密(破解有道詞典加密的演算法)
阿新 • • 發佈:2018-11-02
js加密
- 有的反爬蟲策略採用js對需要傳輸的資料進行加密處理(通常是取md5值)
- 經過加密,傳輸的就是密文,但是
- 加密函式或者過程一定是在瀏覽器完成,也就是一定會把程式碼(js程式碼)暴露給使用者
- 通過閱讀加密演算法,就可以模擬出加密過程,從而達到破解
- 過程參看案例1, 案例2
- 過程比較囉嗦,筆記比較少,仔細觀察
案例1:
''' 破解有道詞典 V1 ''' from urllib import request, parse def youdao(key): url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule" data = { "i": "boy", "from":"AUTO", "to": "AUTO", "smartresult": "dict", "client": "fanyideskweb", "salt": "1523100789519", "sign": "b8a55a436686cd89873fa46514ccedbe", "doctype": "json", "version": "2.1", "keyfrom": "fanyi.web", "action":"FY_BY_REALTIME", "typoResult": "false" } # 引數data需要是bytes格式 data = parse.urlencode(data).encode() headers = { "Accept": "application/json,text/javascript,*/*;q=0.01", #"Accept-Encoding": "gzip,deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Content-Length": "200", "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "Cookie": "
[email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517", "Host": "fanyi.youdao.com", "Origin": "http://fanyi.youdao.com", "Referer": "http://fanyi.youdao.com/", "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest" } req = request.Request(url=url, data=data, headers=headers) rsp = request.urlopen(req) html = rsp.read().decode() print(html) if __name__ == '__main__': youdao("boy")
案例2:
'''
V2
處理js加密程式碼
'''
'''
通過查詢,能找到js程式碼中操作程式碼
1. 這個是計算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!");
md5一共需要四個引數,第一個和第四個都是固定值的字串,第三個是所謂的salt,第二個是。。。。。
第二個引數就是輸入的要查詢的單詞
'''
#得到鹽
def getSalt():
'''
salt公式是: "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
把他翻譯成python程式碼
:return:
'''
import time, random
#通過對js程式碼查詢,然後把鹽的公式中的兩項在瀏覽器的F12狀態下console中執行查詢結果,然後用Python程式碼替換(他們的結果相同)
salt = int(time.time()*1000) + random.randint(0,10)
return salt
#得到md5值
def getMD5(v):
import hashlib
#生成md5例項
md5 = hashlib.md5()
# update需要一共bytes格式的引數
md5.update(v.encode("utf-8"))
sign = md5.hexdigest()
return sign
def getSign(key, salt):
sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!"
sign = getMD5(sign)
return sign
from urllib import request, parse
def youdao(key):
url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
#要確定多個地方是同一個鹽
salt = getSalt()
data = {
"i": key,
"from":"AUTO",
"to": "AUTO",
"smartresult": "dict",
"client": "fanyideskweb",
"salt": str(salt) ,
"sign": getSign(key, salt),
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action":"FY_BY_REALTIME",
"typoResult": "false"
}
print(data)
# 引數data需要是bytes格式
data = parse.urlencode(data).encode()
headers = {
"Accept": "application/json,text/javascript,*/*;q=0.01",
#"Accept-Encoding": "gzip,deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": len(data),
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Cookie": " [email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
"Host": "fanyi.youdao.com",
"Origin": "http://fanyi.youdao.com",
"Referer": "http://fanyi.youdao.com/",
"User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
}
req = request.Request(url=url, data=data, headers=headers)
rsp = request.urlopen(req)
html = rsp.read().decode()
print(html)
if __name__ == '__main__':
youdao("boy")