1. 程式人生 > >知乎登入以及改版後的知乎登入(小知識點)

知乎登入以及改版後的知乎登入(小知識點)

1. 關於cookie和session

2. 英文驗證碼登入知乎(零碎知識點...喔喔...自己怕忘隨意整理一下, 有點亂)

---判斷驗證碼是否存在時, 請求的網址相對路徑為圖中:path(captcha?lang=en 請求的是英文的驗證碼)

---圖中的{"show_captcha": false} 表明本次登入不用輸入驗證碼

---captcha?lang=en : 驗證碼是英文的

---captcha?lang=cn : 驗證碼是中文的

---圖中的{"show_captcha": true} 表明本次登入需要輸入驗證碼

---圖中{"show_captcha": true}, 在判斷是否含有驗證碼是需要拿到---(判斷是否有驗證碼是get請求)

======================================================================

---圖中圈出的"img-base64", 在獲取驗證碼是需要拿到---(獲取驗證碼是put請求)

---(索取驗證碼圖片, 在保證有驗證碼的前提下才會發送put)

======================================================================

---圖中圈出的"success", 在驗證驗證碼是否正確時需要拿到---(驗證驗證碼是否正確是post請求)

---需要傳引數

====================================================================

---set-cookie: ... 的位置

=====================================================================

---登入時傳送的post請求,且需要傳引數:

---登入成功

---圖中圈出的相對路徑是在登入時請求的, 如:("https://www.zhihu.com/api/v3/oauth/sign_in")

====================================================================

登入成功之後---

session.get("https://www.zhihu.com/") 知乎首頁的網址

====================================================================

3. 中文驗證碼登入知乎

---首先請求的:path(/api/v3/oauth/captcha?lang=cn)

---其次,要把所有點的座標記錄下來(通過抓包工具抓取)

---get_captchca時判斷

---完成,大概過程就是這樣了,嘿嘿...

再來補充一下, 附上英文驗證碼的栗子~

# 英文驗證碼的登入方式
# 中文登入(點選倒立文字)
import requests,time,json

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
    'Referer': "https://www.zhihu.com/signup?next=%2F",
    'origin': "https://www.zhihu.com",
    "Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
}
# 獲取的伺服器的Set-Cookie用session直接自動解析並儲存, 在後續的請求中, 會在請求頭中自動攜帶這些cookie
# LWPCookieJar: 對cookie進行自動操作, load() save()

from http.cookiejar import LWPCookieJar

"""建立session管理cookie"""
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')

"""載入cookie"""
try:
    session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
    print('載入失敗')

"""請求知乎登入介面"""
res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
print(res)

"""定義知乎登入函式"""
def zhihu_login():

    # 登入之前判斷是否有驗證碼
    has_captcha = is_captcha()
    if has_captcha:
        # 獲取驗證碼
        captcha = get_captcha() # get_captcha()的返回值
        # 在提交登入之前需要對輸入的驗證碼的正確性進行檢驗
        is_true = check_captcha(captcha)
        if is_true == False:
            return

        login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
        # 登入需要傳的引數
        post_params = {
            "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
            "grant_type": "password",
            "timestamp": "1530194590142",
            "source": "com.zhihu.web",
            "signature": "9c16a7c48a9a74c3a5747a56125d9ab08a79f070",
            "username": "XXX",
            "password": "XXXXXX",
            "captcha": captcha,
            "lang": "cn",
            "ref_source": "other_",
            "utm_source": "baidu",
        }

        try:
            response = session.post(login_url, data=post_params, headers=headers, verify=False)
            if response.status_code == 201:
                session.cookies.save(ignore_discard=True, ignore_expires=True)
                print(response.text)
            else:
                print('登入失敗')
        except Exception as e:
            print('請求失敗', e)



"""判斷是否有英文驗證碼"""
def is_captcha():
    """獲取驗證碼請求的url"""
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    try:
        response = session.get(url=captcha_url, headers=headers, verify=False)
        if response.status_code == 200:
            show_captcha = json.loads(response.text)['show_captcha']
            if show_captcha:
                print('有驗證碼')
                return True
            else:
                print('沒有驗證碼')
                return False
    except Exception as e:
        print('')

import base64
from PIL import Image
from io import BytesIO

"""獲取驗證碼"""
def get_captcha():
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    try:
        """索取驗證碼圖片, 在保證有驗證碼的前提下才會發送put"""
        response = session.put(url=captcha_url, headers=headers, verify=False)
        if response.status_code == 202:  # 注意狀態碼
            captcha_url = json.loads(response.text)['img_base64']

            # 解碼圖片
            url = base64.b64decode(captcha_url)
            url = BytesIO(url)
            image = Image.open(url)
            image.show()

            captcha = input('請輸入驗證碼:')
            return captcha
    except Exception as e:
        print('')


"""驗證驗證碼是否輸入正確(需要傳入引數captcha)"""
def check_captcha(captcha):
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    post_params = {
        'input_text': captcha
    }
    # verify = False: 在傳送https請求的時候, 關閉證書認證
    response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
    json_obj= json.loads(response.text)
    if 'success' in json_obj:
        print('輸入驗證碼正確')
        return True
    else:
        print('輸入驗證碼不正確')
        return False


if __name__ == '__main__':
    zhihu_login()

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

改版後的知乎登入:

原來form表單裡的資料是直接呈現出來的, 改版之後就沒有了, 經過使用花瓶抓包以及多次的嘗試發現:

其他資料沒啥太大變化但是signature是經過sha1加密後生成的一串資料,所以需要做出一下改變

 下面附上程式碼:

# 英文驗證碼的登入方式

# 中文登入(點選倒立文字)

import requests,time,json
from hashlib import sha1
import hmac

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
    "Referer": "https://www.zhihu.com/signup?next=%2F",
    "origin": "https://www.zhihu.com",
    "Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
    "Host": "www.zhihu.com"
}

# cookies的自動化管理。
# 獲取的伺服器的Set-Cookie用session直接自動解析並儲存,在後續的請求中,會在請求頭中自動攜帶這些cookie
# LWPCookieJar:對cookie進行自動操作,load() save()
from http.cookiejar import LWPCookieJar

session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')

try:
    session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
    print('暫時沒有Cookie')

# res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
# print(res)

def zhihu_login():

    global session
    has_captcha = is_captcha()
    if has_captcha:
        # 獲取驗證碼
        captcha = get_captcha()
        # 在提交登陸之前,還需要對輸入的驗證碼的正確性進行單獨驗證
        is_true = check_captcha(captcha)
        if is_true == False:
            return
    else:
        captcha = ''

    # 1528450244046.0112
    # print(time.time())
    login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"

    # key(配合著加密資料而使用的Key:d1b964811afb40118a12068ff74a12f4),
    # msg = None, 要加密的重要資料。(適合一個數據加密)
    # digestmod = None, 採用的加密方式, md5, sha1

    # 1. 建立雜湊加密物件
    hm = hmac.new(str.encode('d1b964811afb40118a12068ff74a12f4'), msg=None, digestmod=sha1)

    tm = str(int(time.time() * 1000))
    print('tm = ',tm)

    # 2. 開始向加密物件中傳入需要加密的資料
    # 注意新增順序。
    hm.update(str.encode('password'))
    hm.update(str.encode('c3cef7c66a1843f8b3a9e6a1e3160e20'))
    hm.update(str.encode('com.zhihu.web'))
    hm.update(str.encode(tm))

    # 3. 獲取加密後的結果(就是signature簽名。)
    res = hm.hexdigest()

    print('signature = ',res)

    post_params = {
        "client_id":"c3cef7c66a1843f8b3a9e6a1e3160e20",
        "grant_type": "password",
        "timestamp": tm,
        "source": "com.zhihu.web",
        "signature": res,
        "username": "***",
        "password": "***",
        "captcha": captcha,
        "lang": "cn",
        "ref_source": "homepage",
        "utm_source": "",
    }

    try:
        response = session.post(login_url, data=post_params, headers=headers, verify=False)
        if response.status_code == 201:
            print('登入成功')
            session.cookies.save(ignore_discard=True, ignore_expires=True)
            print(response.text)
        else:
            print('登入失敗')
            print(response.text)
    except Exception as e:
        print('請求失敗',e)

def is_captcha():
    global COOKIE
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    try:
        response = session.get(url=captcha_url, headers=headers,verify=False)
        if response.status_code == 200:
            show_captcha = json.loads(response.text)['show_captcha']
            if show_captcha:
                print('有驗證碼')
                return True
            else:
                print('沒有驗證碼')
                return False
    except Exception as e:
        print('')

import base64
from PIL import Image
from io import BytesIO


def get_captcha():
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'

    # set-cookie: capsion_ticket="2|1:0|10:1528448404|14:capsion_ticket|44:MjIyMTdjMDNlNWQ0NDU4NDk3YWJiYTJhMGNhYzdhMGU=|27fc1b86cbb52d627f270fdc6ee72f58f88ae09b76483d30ff1026418d83bfce"; Domain=zhihu.com; expires=Sun, 08 Jul 2018 09:00:04 GMT; httponly; Path=/

    try:
        # 索取驗證碼圖片,在保證有驗證碼的前提下才會發送PUT
        response = session.put(url=captcha_url, headers=headers,verify=False)
        if response.status_code == 202:
            captcha_url = json.loads(response.text)['img_base64']
            # 解碼captcha_url

            url = base64.b64decode(captcha_url)
            url = BytesIO(url)
            image = Image.open(url)
            image.show()

            captcha = input('請輸入驗證碼:')
            return captcha
    except Exception as e:
        print('')


def check_captcha(captcha):
    captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
    post_params = {
        'input_text': captcha
    }

    # verify=False: 在傳送https請求的時候,關閉證書認證
    response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
    json_obj = json.loads(response.text)
    if 'success' in json_obj:
        print('輸入驗證碼正確')
        return True
    else:
        print('輸入驗證碼不正確')
        return False


if __name__ == '__main__':
    zhihu_login()
    # res = session.get('https://www.zhihu.com/', headers=headers, verify=False).text
    # print(res)



# [SSL: CERTIFICATE_VERIFY_FAILED]: 在requests傳送https請求時,出現的證書認證失敗,解決辦法:verify=False
# InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
#   InsecureRequestWarning)