spider---手動驗證碼的登入操作
阿新 • • 發佈:2018-11-07
import requests from bs4 import BeautifulSoup # 要使用會話。 s = requests.Session() url = 'https://so.gushiwen.org/user/login.aspx?from=http://www.gushiwen.org/default.aspx' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36' } r = s.get(url=url, headers=headers) soup = BeautifulSoup(r.text, 'lxml') # 驗證碼屬性 image_src = 'https://so.gushiwen.org' + soup.select('#imgCode')[0]['src'] r_image = s.get(url=image_src, headers=headers) with open('code.png', 'wb') as fp: fp.write(r_image.content) # 傳送post請求 # 獲取表單框裡資料 views = soup.select('#__VIEWSTATE')[0]['value'] viewg = soup.select('#__VIEWSTATEGENERATOR')[0]['value'] # 讓使用者輸入驗證碼 code = input('請輸入驗證碼--') user_email = input('請輸入驗證碼--') user_password = input('請輸入驗證碼--') post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx' from_data = { "__VIEWSTATE": views, "__VIEWSTATEGENERATOR": viewg, "from": "http://www.gushiwen.org/default.aspx", "email": user_email, "pwd": user_password, "code": code, "denglu": "登入", } r_post = s.post(url=post_url, headers=headers, data=from_data) print(r_post.text)