1. 程式人生 > 實用技巧 >爬蟲-模擬登入

爬蟲-模擬登入

模擬登入的程式碼實現:

#socket服務端
import socket
import json
import threading
server = socket.socket()
#繫結到0.0.0.0:8000埠上
server.bind(('0.0.0.0', 8002))
server.listen()

#服務在使用者登入成功之後,給使用者返回一段字串sessionid(夠複雜,生成演算法別人偽造不了)
user_info = {
    "sessionid":"bobby"
}
#瀏覽器每一次請求(所有的url)都自動帶上這個sessionid
#1.如何告知瀏覽器這個sessionid
#2.如何確保瀏覽器每一次請求都帶上這個sessionid

#session和cookie的區別
#
1. session是由伺服器維護的,並由伺服器解釋,通過set-cookie交給瀏覽器 #2. cookie是瀏覽器的工具,並在後續的每一次請求中都帶上這些值 def handle_sock(sock, addr): while True: # recv方法是阻塞的 tmp_data = sock.recv(1024) print(tmp_data.decode("utf8")) response_template = '''HTTP/1.0 200 OK Content-type: text/html Set-Cookie: name=bobby Set
-Cookie: course_id=78 Set-Cookie: sessionid=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT {} ''' data = [ { "name":"django打造線上教育", "teacher":"bobby", "url":"https://coding.imooc.com/class/78.html" }, {
"name": "python高階程式設計", "teacher": "bobby", "url": "https://coding.imooc.com/class/200.html" }, { "name": "scrapy分散式爬蟲", "teacher": "bobby", "url": "https://coding.imooc.com/class/92.html" }, { "name": "django rest framework打造生鮮電商", "teacher": "bobby", "url": "https://coding.imooc.com/class/131.html" }, { "name": "tornado從入門到精通", "teacher": "bobby", "url": "https://coding.imooc.com/class/290.html" }, ] sock.send(response_template.format(json.dumps(data)).encode("utf8")) sock.close() break #獲取客戶端連線並啟動執行緒去處理 while True: # 阻塞等待連線 sock, addr = server.accept() #啟動一個執行緒去處理新的使用者連線 client_thread = threading.Thread(target=handle_sock, args=(sock, addr)) client_thread.start()

requests+session模擬登入豆瓣:

import json
import pickle

import requests


def login():
    session = requests.session()
    username = "18782902568"
    password = "admin123"
    url = "https://accounts.douban.com/j/mobile/login/basic"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    }

    post_data = {
        "ck": "",
        "name": username,
        "password": password,
        "remember": "true",
        "ticket": ""
    }

    res = session.post(url, data=post_data, headers=headers)
    res_json = json.loads(res.text)
    if res_json["status"] == "success":
        print("登入成功")
        with open("douban.cookie", "wb") as f:
            pickle.dump(res.cookies, f)
    else:
        print("登入失敗")

    with open("douban.cookie", "rb") as f:
        cookies = pickle.load(f)
        html = requests.get("https://www.douban.com/", cookies=cookies).text
        if "bobby_liyao" in html:
            print("已經登入")
        else:
            print("未登入")


if __name__ == "__main__":
    login()

注意:

1】使用requests.session()而不是requests。是為了使得登入的cookie能夠實現共享。

2】使用pickle,是為了儲存與獲取序列化資料更加的便捷。

3】當然資料在無需儲存到檔案裡面,可以直接使用res.cookies或者序列化為字典res.cookie.get_dic()。獲取cookie並傳遞。

selenium模擬登入:

import time

import requests
from selenium import webdriver

url = "https://www.douban.com/"
browser = webdriver.Chrome(executable_path="E:/in32/chromedriver.exe")


def login():
    #通過selenium模擬登入都豆瓣
    username = "18782902568"
    password = "admin123"
    browser.get(url)
    time.sleep(3)
  #切換到frame browser.switch_to.frame(browser.find_element_by_tag_name(
"iframe")) login_ele = browser.find_element_by_xpath("//li[@class='account-tab-account']") login_ele.click() username_ele = browser.find_element_by_xpath("//input[@id='username']") password_ele = browser.find_element_by_xpath("//input[@id='password']") username_ele.send_keys(username) password_ele.send_keys(password) #解決按鈕屬性變動 submit_btn = browser.find_element_by_xpath("//a[@class='btn btn-account btn-active']") submit_btn.click() time.sleep(10)
#得到[{}],轉化為字典進行傳送 cookies
= browser.get_cookies() cookie_dict = {} for item in cookies: cookie_dict[item["name"]] = item["value"] res = requests.get(url, cookies=cookie_dict) if "bobby_liyao" in res.text: print("已經登入") if __name__ == "__main__": login()