1. 程式人生 > 其它 >爬蟲urllib中的Cookie反爬處理

爬蟲urllib中的Cookie反爬處理

1.通過對百度翻譯的分析,現在找出在百度翻譯中有個“詳細翻譯的介面”

即:https://fanyi.baidu.com/v2transapi?from=en&to=zh請求地址

2.查詢Request Headers

3.詳細程式碼

#百度詳細翻譯,反爬的第二種情況 Cookie
import urllib.request
import urllib.parse
import json

#1 url請求地址
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
#2 請求頭(反爬中的cookie)
headers = {
    
#'Accept': '*/*', #'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Language': 'zh-CN,zh;q=0.9', # 'Connection': 'keep-alive', # 'Content-Length': '136', # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': 'BIDUPSID=A4B1E6FC0F9F2380A366E7D59492ABAE; PSTM=1642494695; __yjs_duid=1_161fb9fd9dd519015047a85692375cf91642555200799; BDUSS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; BDUSS_BFESS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; REALTIME_TRANS_SWITCH=1; SOUND_SPD_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_PREFER_SWITCH=1; MCITY=-48%3A; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; H_PS_PSSID=36428_36455_31254_34813_35914_36165_35979_36055_36234_26350_36469_36447; delPer=0; PSINO=2; BDSFRCVID_BFESS=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; BAIDUID=F35C07088578422B085A47B6C7D90E35:FG=1; BAIDUID_BFESS=F35C07088578422B085A47B6C7D90E35:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; ab_sr=1.0.1_MTRkNjA3OGMzZTY3N2Q3MzQ1ZTdmNGFmNTAwMTc4MmJkNjg4YzIyYmJlYWU0OWUyNmY2YTM1NGU2NzZhZjg4MDJhYWZhMzQwMjJjYjMzY2UwNjMxOGI3YmMyZWFkOTE5MTVmYzZjNGRhNjAxMDFjZGI4NDNkZmEzM2Y3ODE3NTI4ZmJkYjdlNGIyZjA3YmE0NzIyMWM1NDliYzJkNjU4NjU0YWRhZWNhNWZhNjVjMmRkMzMwMDZmMGZjYmEyNWU3
', # 'Host': 'fanyi.baidu.com', # 'Origin': 'https://fanyi.baidu.com', # 'Referer': 'https://fanyi.baidu.com/', # 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"', # 'sec-ch-ua-mobile': '?0', # 'sec-ch-ua-platform': '"Windows"', # 'Sec-Fetch-Dest': 'empty',
# 'Sec-Fetch-Mode': 'cors', # 'Sec-Fetch-Site': 'same-origin', # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36', # 'X-Requested-With': 'XMLHttpRequest' } #3 請求引數 data = { 'from':'en', 'to':'zh', 'query': 'love', 'transtype': 'translang', 'simple_means_flag': '3', 'sign': '198772.518981', 'token': '4f6dbf9201136c6f7280be67858fd77d', 'domain': 'common' } #post請求的引數必須進行編碼並且要呼叫encode方法 data = urllib.parse.urlencode(data).encode('utf-8') #4 請求物件的定製 request = urllib.request.Request(url=url,data=data,headers=headers) #5 模擬瀏覽器向伺服器傳送請求 response = urllib.request.urlopen(request) #6 獲取響應的資料 content = response.read().decode('utf-8') obj = json.loads(content) print(obj)

執行效果