古詩文網驗證碼識別
阿新 • • 發佈:2020-07-28
#!/usr/bin/python import requests from lxml import etree from codeClass import YDMHTTP #封裝識別驗證碼圖片的函式 def getCodeText(imgPath,codeType): pass #將驗證碼下載到本地 headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36' } url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx' page_text = requests.get(url=url,headers=headers).text #解析驗證碼圖片img中的src屬性 page_text = etree.HTML(page_text) code_img_src = 'https://so.gushiwen.org'+tree.xpath('//*[@id="imgCode"]/@src')[0] img_data = requests.get(url=code_img_src,headers=headers).content #將驗證碼圖片儲存到了本地 with open('./code.jpg','wb') as fp: fp.write(img_data) #呼叫打碼平臺對應程式進行識別 code_text = getCodeText('code.jpg',1004) print('識別結果為:',code_text)