爬蟲-模擬登入
阿新 • • 發佈:2020-07-20
模擬登入的程式碼實現:
#socket服務端 import socket import json import threading server = socket.socket() #繫結到0.0.0.0:8000埠上 server.bind(('0.0.0.0', 8002)) server.listen() #服務在使用者登入成功之後,給使用者返回一段字串sessionid(夠複雜,生成演算法別人偽造不了) user_info = { "sessionid":"bobby" } #瀏覽器每一次請求(所有的url)都自動帶上這個sessionid #1.如何告知瀏覽器這個sessionid #2.如何確保瀏覽器每一次請求都帶上這個sessionid #session和cookie的區別 #1. session是由伺服器維護的,並由伺服器解釋,通過set-cookie交給瀏覽器 #2. cookie是瀏覽器的工具,並在後續的每一次請求中都帶上這些值 def handle_sock(sock, addr): while True: # recv方法是阻塞的 tmp_data = sock.recv(1024) print(tmp_data.decode("utf8")) response_template = '''HTTP/1.0 200 OK Content-type: text/html Set-Cookie: name=bobby Set-Cookie: course_id=78 Set-Cookie: sessionid=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT {} ''' data = [ { "name":"django打造線上教育", "teacher":"bobby", "url":"https://coding.imooc.com/class/78.html" }, {"name": "python高階程式設計", "teacher": "bobby", "url": "https://coding.imooc.com/class/200.html" }, { "name": "scrapy分散式爬蟲", "teacher": "bobby", "url": "https://coding.imooc.com/class/92.html" }, { "name": "django rest framework打造生鮮電商", "teacher": "bobby", "url": "https://coding.imooc.com/class/131.html" }, { "name": "tornado從入門到精通", "teacher": "bobby", "url": "https://coding.imooc.com/class/290.html" }, ] sock.send(response_template.format(json.dumps(data)).encode("utf8")) sock.close() break #獲取客戶端連線並啟動執行緒去處理 while True: # 阻塞等待連線 sock, addr = server.accept() #啟動一個執行緒去處理新的使用者連線 client_thread = threading.Thread(target=handle_sock, args=(sock, addr)) client_thread.start()
requests+session模擬登入豆瓣:
import json import pickle import requests def login(): session = requests.session() username = "18782902568" password = "admin123" url = "https://accounts.douban.com/j/mobile/login/basic" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" } post_data = { "ck": "", "name": username, "password": password, "remember": "true", "ticket": "" } res = session.post(url, data=post_data, headers=headers) res_json = json.loads(res.text) if res_json["status"] == "success": print("登入成功") with open("douban.cookie", "wb") as f: pickle.dump(res.cookies, f) else: print("登入失敗") with open("douban.cookie", "rb") as f: cookies = pickle.load(f) html = requests.get("https://www.douban.com/", cookies=cookies).text if "bobby_liyao" in html: print("已經登入") else: print("未登入") if __name__ == "__main__": login()
注意:
1】使用requests.session()而不是requests。是為了使得登入的cookie能夠實現共享。
2】使用pickle,是為了儲存與獲取序列化資料更加的便捷。
3】當然資料在無需儲存到檔案裡面,可以直接使用res.cookies或者序列化為字典res.cookie.get_dic()。獲取cookie並傳遞。
selenium模擬登入:
import time import requests from selenium import webdriver url = "https://www.douban.com/" browser = webdriver.Chrome(executable_path="E:/in32/chromedriver.exe") def login(): #通過selenium模擬登入都豆瓣 username = "18782902568" password = "admin123" browser.get(url) time.sleep(3)
#切換到frame browser.switch_to.frame(browser.find_element_by_tag_name("iframe")) login_ele = browser.find_element_by_xpath("//li[@class='account-tab-account']") login_ele.click() username_ele = browser.find_element_by_xpath("//input[@id='username']") password_ele = browser.find_element_by_xpath("//input[@id='password']") username_ele.send_keys(username) password_ele.send_keys(password) #解決按鈕屬性變動 submit_btn = browser.find_element_by_xpath("//a[@class='btn btn-account btn-active']") submit_btn.click() time.sleep(10)
#得到[{}],轉化為字典進行傳送 cookies = browser.get_cookies() cookie_dict = {} for item in cookies: cookie_dict[item["name"]] = item["value"] res = requests.get(url, cookies=cookie_dict) if "bobby_liyao" in res.text: print("已經登入") if __name__ == "__main__": login()