知乎登入以及改版後的知乎登入(小知識點)
1. 關於cookie和session
2. 英文驗證碼登入知乎(零碎知識點...喔喔...自己怕忘隨意整理一下, 有點亂)
---判斷驗證碼是否存在時, 請求的網址相對路徑為圖中:path(captcha?lang=en 請求的是英文的驗證碼)
---圖中的{"show_captcha": false} 表明本次登入不用輸入驗證碼
---captcha?lang=en : 驗證碼是英文的
---captcha?lang=cn : 驗證碼是中文的
---圖中的{"show_captcha": true} 表明本次登入需要輸入驗證碼
---圖中{"show_captcha": true}, 在判斷是否含有驗證碼是需要拿到---(判斷是否有驗證碼是get請求)
======================================================================
---圖中圈出的"img-base64", 在獲取驗證碼是需要拿到---(獲取驗證碼是put請求)
---(索取驗證碼圖片, 在保證有驗證碼的前提下才會發送put)
======================================================================
---圖中圈出的"success", 在驗證驗證碼是否正確時需要拿到---(驗證驗證碼是否正確是post請求)
---需要傳引數
====================================================================
---set-cookie: ... 的位置
=====================================================================
---登入時傳送的post請求,且需要傳引數:
---登入成功
---圖中圈出的相對路徑是在登入時請求的, 如:("https://www.zhihu.com/api/v3/oauth/sign_in")
====================================================================
登入成功之後---
session.get("https://www.zhihu.com/") 知乎首頁的網址
====================================================================
3. 中文驗證碼登入知乎
---首先請求的:path(/api/v3/oauth/captcha?lang=cn)
---其次,要把所有點的座標記錄下來(通過抓包工具抓取)
---get_captchca時判斷
---完成,大概過程就是這樣了,嘿嘿...
再來補充一下, 附上英文驗證碼的栗子~
# 英文驗證碼的登入方式
# 中文登入(點選倒立文字)
import requests,time,json
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
'Referer': "https://www.zhihu.com/signup?next=%2F",
'origin': "https://www.zhihu.com",
"Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
}
# 獲取的伺服器的Set-Cookie用session直接自動解析並儲存, 在後續的請求中, 會在請求頭中自動攜帶這些cookie
# LWPCookieJar: 對cookie進行自動操作, load() save()
from http.cookiejar import LWPCookieJar
"""建立session管理cookie"""
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')
"""載入cookie"""
try:
session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
print('載入失敗')
"""請求知乎登入介面"""
res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
print(res)
"""定義知乎登入函式"""
def zhihu_login():
# 登入之前判斷是否有驗證碼
has_captcha = is_captcha()
if has_captcha:
# 獲取驗證碼
captcha = get_captcha() # get_captcha()的返回值
# 在提交登入之前需要對輸入的驗證碼的正確性進行檢驗
is_true = check_captcha(captcha)
if is_true == False:
return
login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
# 登入需要傳的引數
post_params = {
"client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"timestamp": "1530194590142",
"source": "com.zhihu.web",
"signature": "9c16a7c48a9a74c3a5747a56125d9ab08a79f070",
"username": "XXX",
"password": "XXXXXX",
"captcha": captcha,
"lang": "cn",
"ref_source": "other_",
"utm_source": "baidu",
}
try:
response = session.post(login_url, data=post_params, headers=headers, verify=False)
if response.status_code == 201:
session.cookies.save(ignore_discard=True, ignore_expires=True)
print(response.text)
else:
print('登入失敗')
except Exception as e:
print('請求失敗', e)
"""判斷是否有英文驗證碼"""
def is_captcha():
"""獲取驗證碼請求的url"""
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
response = session.get(url=captcha_url, headers=headers, verify=False)
if response.status_code == 200:
show_captcha = json.loads(response.text)['show_captcha']
if show_captcha:
print('有驗證碼')
return True
else:
print('沒有驗證碼')
return False
except Exception as e:
print('')
import base64
from PIL import Image
from io import BytesIO
"""獲取驗證碼"""
def get_captcha():
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
"""索取驗證碼圖片, 在保證有驗證碼的前提下才會發送put"""
response = session.put(url=captcha_url, headers=headers, verify=False)
if response.status_code == 202: # 注意狀態碼
captcha_url = json.loads(response.text)['img_base64']
# 解碼圖片
url = base64.b64decode(captcha_url)
url = BytesIO(url)
image = Image.open(url)
image.show()
captcha = input('請輸入驗證碼:')
return captcha
except Exception as e:
print('')
"""驗證驗證碼是否輸入正確(需要傳入引數captcha)"""
def check_captcha(captcha):
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
post_params = {
'input_text': captcha
}
# verify = False: 在傳送https請求的時候, 關閉證書認證
response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
json_obj= json.loads(response.text)
if 'success' in json_obj:
print('輸入驗證碼正確')
return True
else:
print('輸入驗證碼不正確')
return False
if __name__ == '__main__':
zhihu_login()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
改版後的知乎登入:
原來form表單裡的資料是直接呈現出來的, 改版之後就沒有了, 經過使用花瓶抓包以及多次的嘗試發現:
其他資料沒啥太大變化但是signature是經過sha1加密後生成的一串資料,所以需要做出一下改變
下面附上程式碼:
# 英文驗證碼的登入方式
# 中文登入(點選倒立文字)
import requests,time,json
from hashlib import sha1
import hmac
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
"Referer": "https://www.zhihu.com/signup?next=%2F",
"origin": "https://www.zhihu.com",
"Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
"Host": "www.zhihu.com"
}
# cookies的自動化管理。
# 獲取的伺服器的Set-Cookie用session直接自動解析並儲存,在後續的請求中,會在請求頭中自動攜帶這些cookie
# LWPCookieJar:對cookie進行自動操作,load() save()
from http.cookiejar import LWPCookieJar
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')
try:
session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
print('暫時沒有Cookie')
# res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
# print(res)
def zhihu_login():
global session
has_captcha = is_captcha()
if has_captcha:
# 獲取驗證碼
captcha = get_captcha()
# 在提交登陸之前,還需要對輸入的驗證碼的正確性進行單獨驗證
is_true = check_captcha(captcha)
if is_true == False:
return
else:
captcha = ''
# 1528450244046.0112
# print(time.time())
login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
# key(配合著加密資料而使用的Key:d1b964811afb40118a12068ff74a12f4),
# msg = None, 要加密的重要資料。(適合一個數據加密)
# digestmod = None, 採用的加密方式, md5, sha1
# 1. 建立雜湊加密物件
hm = hmac.new(str.encode('d1b964811afb40118a12068ff74a12f4'), msg=None, digestmod=sha1)
tm = str(int(time.time() * 1000))
print('tm = ',tm)
# 2. 開始向加密物件中傳入需要加密的資料
# 注意新增順序。
hm.update(str.encode('password'))
hm.update(str.encode('c3cef7c66a1843f8b3a9e6a1e3160e20'))
hm.update(str.encode('com.zhihu.web'))
hm.update(str.encode(tm))
# 3. 獲取加密後的結果(就是signature簽名。)
res = hm.hexdigest()
print('signature = ',res)
post_params = {
"client_id":"c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"timestamp": tm,
"source": "com.zhihu.web",
"signature": res,
"username": "***",
"password": "***",
"captcha": captcha,
"lang": "cn",
"ref_source": "homepage",
"utm_source": "",
}
try:
response = session.post(login_url, data=post_params, headers=headers, verify=False)
if response.status_code == 201:
print('登入成功')
session.cookies.save(ignore_discard=True, ignore_expires=True)
print(response.text)
else:
print('登入失敗')
print(response.text)
except Exception as e:
print('請求失敗',e)
def is_captcha():
global COOKIE
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
response = session.get(url=captcha_url, headers=headers,verify=False)
if response.status_code == 200:
show_captcha = json.loads(response.text)['show_captcha']
if show_captcha:
print('有驗證碼')
return True
else:
print('沒有驗證碼')
return False
except Exception as e:
print('')
import base64
from PIL import Image
from io import BytesIO
def get_captcha():
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
# set-cookie: capsion_ticket="2|1:0|10:1528448404|14:capsion_ticket|44:MjIyMTdjMDNlNWQ0NDU4NDk3YWJiYTJhMGNhYzdhMGU=|27fc1b86cbb52d627f270fdc6ee72f58f88ae09b76483d30ff1026418d83bfce"; Domain=zhihu.com; expires=Sun, 08 Jul 2018 09:00:04 GMT; httponly; Path=/
try:
# 索取驗證碼圖片,在保證有驗證碼的前提下才會發送PUT
response = session.put(url=captcha_url, headers=headers,verify=False)
if response.status_code == 202:
captcha_url = json.loads(response.text)['img_base64']
# 解碼captcha_url
url = base64.b64decode(captcha_url)
url = BytesIO(url)
image = Image.open(url)
image.show()
captcha = input('請輸入驗證碼:')
return captcha
except Exception as e:
print('')
def check_captcha(captcha):
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
post_params = {
'input_text': captcha
}
# verify=False: 在傳送https請求的時候,關閉證書認證
response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
json_obj = json.loads(response.text)
if 'success' in json_obj:
print('輸入驗證碼正確')
return True
else:
print('輸入驗證碼不正確')
return False
if __name__ == '__main__':
zhihu_login()
# res = session.get('https://www.zhihu.com/', headers=headers, verify=False).text
# print(res)
# [SSL: CERTIFICATE_VERIFY_FAILED]: 在requests傳送https請求時,出現的證書認證失敗,解決辦法:verify=False
# InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
# InsecureRequestWarning)