NJU實驗室安全題庫爬蟲
阿新 • • 發佈:2019-01-31
關鍵是抓包分析請求,這裡模擬登陸有點難,需要包含很多資訊,也可以先手動登陸再用cookeis,抓取資料用到BS4解析
from bs4 import BeautifulSoup as bfs import requests import time def login(username,password): headers = {'Host': '219.219.115.160', 'Accept': 'text/html, application/xhtml+xml, image/jxr, */*', 'Connection': 'Keep-Alive', 'Accept-Encoding': 'gzip, deflate', 'Content-Length': '618', 'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5', 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': 'ASP.NET_SessionId=nux5h1mhokpcrelfcpqqr2w5', 'Referer': 'http://219.219.115.160/pc/index.aspx', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'} data = { '__VIEWSTATE': '/wEPDwUKMTg4ODA4NDE0Ng9kFgICAw9kFgICBw8PFgIeB0VuYWJsZWRnFgIeB29uY2xpY2sFrwJ2Y' 'XIgbGVmdD0oc2NyZWVuLndpZHRoLTU1MCkvMjt2YXIgdG9wPShzY3JlZW4uaGVpZ2h0LTYwLTQyMikvMjtOZXdXaW49d2' 'luZG93Lm9wZW4oJ1BlcnNvbkluZm8vUmVnaXN0VXNlci5hc3B4JywnUmVnaXN0VXNlcicsJ3RpdGxlYmFyPXllcyxtZW51YmFyPW5vLHRvb' '2xiYXI9bm8sbG9jYXRpb249bm8sZGlyZWN0b3JpZXM9bm8sc3RhdHVzPXllcyxzY3JvbGxiYXJzPW5vLHJlc2l6YWJsZT1ubyxjb3B5aGlzdG9yeT1' '5ZXMsdG9wPScrdG9wKycsbGVmdD0nK2xlZnQrJyx3aWR0aD01NTAsaGVpZ2h0PTUwMCcpO3JldHVybiBmYWxzZTtkZBwwcHXyGLjoZswfc5UGVTqx/I86h' 'm7z1UxpKqX/aXuT', '__VIEWSTATEGENERATOR': 'BBE0D82B', 'ButLogin': '%B5%C7+%C2%BC', 'LoginID': username, 'UserPwd': password} url = 'http://219.219.115.160/pc/index.aspx' spyder = requests.session() response = spyder.post(url, data=data, headers=headers) if(response): print("登陸成功") return spyder def get_problems(spyder,number): numurl = 'http://219.219.115.160/pc/PersonInfo/StartExercise_Mobile.aspx?TestNum=%s' % (str(number)) headers = {'Host': '219.219.115.160', 'Cache-Control': 'no-cache', 'Accept': 'text/html, application/xhtml+xml, image/jxr, */*', 'Connection': 'Keep-Alive', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5', 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': 'ASP.NET_SessionId=nux5h1mhokpcrelfcpqqr2w5', # 'Referer':'http://219.219.115.160/pc/PersonInfo/StartExercise_Mobile.aspx?TestNum=1&SelTestNum=1&SelectTest=yes', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'} postdata = {'__EVENTARGUMENT': '', '__EVENTTARGET:': '', '__LASTFOCUS:': '', '__VIEWSTATE': '/wEPDwUKMTQ1OTIyNzY3Nw9kFgJmD2QWBGYPEA8WBh4NRGF0YVRleHRGaWVsZAUITG9yZU5hbWUeDkRhdGFWYWx1ZUZpZWxkBQZMb3JlSUQeC18hRGF0YUJvdW5kZ2QQFQkUPT09PeaJgOacieS4k+mimD09PT0V6Ziy54Gr5a6J5YWo5LiO5L+d5a+GG+WMluWtpuWNsemZqeWTgeS9v+eUqOWuieWFqAznlKjnlLXlronlhagJ6YCa6K+G57G7DOW6lOaApeaVkeaPtAnovpDlsITnsbsV6K6h566X5py6572R57uc5a6J5YWoKuS7quWZqOiuvuWkh++8iOeJueenjeiuvuWkh++8ieS9v+eUqOWuieWFqBUJATACNTICNTMCNTkCNjACNjECNjICNjMCNjQUKwMJZ2dnZ2dnZ2dnFgFmZAIBDxBkEBUBFD09PT3lhajpg6jpopjlnos9PT09FQEBMBQrAwFnFgFmZGRML2mFcR03ZNPi08k41KsuIXSn028KtQLqEv8qLV7yCA==', '__VIEWSTATEGENERATOR': '08FA5156', 'AutoJudge': '0', 'drpQuestionType': '0', 'drpSubject': '%s' % str(64), 'FillAutoGrade': '0', 'irow': '1', 'PaperID': '0', 'PassMark': '0', 'SeeResult': '0', 'select1': '1', 'TestTypeTitle1': '判斷題', 'timeminute': '0', 'timesecond': '0', 'UserScoreID': '0' } data = spyder.post(numurl, data=postdata, headers=headers).content.decode('gbk') problem_data = bfs(data, 'html.parser').select('td[colspan]') problem = problem_data[0].text.strip() return problem if __name__ == '__main__': username=input("輸入登陸賬號:") password=input("請輸入密碼:") spyder = login(username, password) i = 1 while 1: try: if(i%20==0): print("已完成%d題爬取", i) f = open('problems.txt', 'a+') problem = get_problems(spyder,i) i+=1 time.sleep(0.5) f.write(problem+'\n') f.close() except: break f.close()