python 驗證碼識別改進版
阿新 • • 發佈:2019-01-06
識別上圖所示的驗證碼
out.show() 是圖片降噪處理後的效果#!/usr/bin/python # encoding: utf-8 import sys reload(sys) sys.setdefaultencoding('utf-8') import requests from lxml import etree import time,datetime import re from switch import * from PIL import Image import sys import pyocr import pyocr.builders import pytesseract image_path = 'XXX.png' im = Image.open(image_path) imgry = im.convert('L') threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) out = imgry.point(table, '1') out.show() #讀取出字串 cap_str = pytesseract.image_to_string(out) print cap_str
然後使用 image_to_string 識別 準確率就提高很多了、
圖片降噪後的處理結果
<img src="https://img-blog.csdn.net/20160604175525790?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQv/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center" alt="" />