1. 程式人生 > >RNN入門(二)識別驗證碼

RNN入門(二)識別驗證碼

介紹

  作為RNN的第二個demo,筆者將會介紹RNN模型在識別驗證碼方面的應用。   我們的驗證碼及樣本資料集來自於部落格: CNN大戰驗證碼,在這篇部落格中,我們已經準備好了所需的樣本資料集,不需要在辛辛苦苦地再弄一遍,直接呼叫data.csv就可以進行建模了。

RNN模型

  用TensorFlow搭建簡單RNN模型,因為是多分類問題,所以在最後的輸出部分再加一softmax層,損失函式採用對數損失函式,optimizer選擇RMSPropOptimizer。以下是RNN模型的完整Python程式碼(TensorFlow_RNN.py):

# -*- coding: utf-8 -*-
import tensorflow as tf import logging # 設定日誌 logging.basicConfig(level = logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s') logger = logging.getLogger(__name__) # RNN類 class RNN: # 初始化 # 引數說明: element_size: 元素大小 # time_steps: 序列大小 # num_classes: 目標變數的類別總數
# batch_size: 圖片總數 # hidden_layer_size: 隱藏層的神經元個數 # epoch: 訓練次數 # learning_rate: 用RMSProp優化時的學習率 # save_model_path: 模型儲存地址 def __init__(self, element_size, time_steps, num_classes, batch_size, hidden_layer_size =
150, epoch = 1000, learning_rate=0.001, save_model_path = r'./logs/RNN_train.ckpt'): self.epoch = epoch self.learning_rate = learning_rate self.save_model_path = save_model_path # 設定RNN結構 self.element_size = element_size self.time_steps = time_steps self.num_classes = num_classes self.batch_size = batch_size self.hidden_layer_size = hidden_layer_size # 輸入向量和輸出向量 self._inputs = tf.placeholder(tf.float32, shape=[None, self.time_steps, self.element_size], name='inputs') self.y = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='inputs') # 利用TensorFlow的內建函式BasicRNNCell, dynamic_rnn來構建RNN的基本模組 rnn_cell = tf.contrib.rnn.BasicRNNCell(self.hidden_layer_size) outputs, _ = tf.nn.dynamic_rnn(rnn_cell, self._inputs, dtype=tf.float32) Wl = tf.Variable(tf.truncated_normal([self.hidden_layer_size, self.num_classes], mean=0, stddev=.01)) bl = tf.Variable(tf.truncated_normal([self.num_classes], mean=0, stddev=.01)) def get_linear_layer(vector): return tf.matmul(vector, Wl) + bl # 取輸出的向量outputs中的最後一個向量最為最終輸出 last_rnn_output = outputs[:, -1, :] self.final_output = get_linear_layer(last_rnn_output) # 定義損失函式並用RMSProp優化 softmax = tf.nn.softmax_cross_entropy_with_logits(logits=self.final_output, labels=self.y) self.cross_entropy = tf.reduce_mean(softmax) self.train_model = tf.train.RMSPropOptimizer(self.learning_rate, 0.9).minimize(self.cross_entropy) self.saver = tf.train.Saver() logger.info('Initialize RNN model...') # 模型訓練 def train(self, x_data, y_data): logger.info('Training RNN model...') with tf.Session() as sess: # 對所有變數進行初始化 sess.run(tf.global_variables_initializer()) # 進行迭代學習 feed_dict = {self._inputs: x_data, self.y: y_data} for i in range(self.epoch + 1): sess.run(self.train_model, feed_dict=feed_dict) if i % int(self.epoch / 50) == 0: # to see the step improvement print('已訓練%d次, loss: %s.' % (i, sess.run(self.cross_entropy, feed_dict=feed_dict))) # 儲存RNN模型 logger.info('Saving RNN model...') self.saver.save(sess, self.save_model_path) # 對新資料進行預測 def predict(self, data): with tf.Session() as sess: logger.info('Restoring RNN model...') self.saver.restore(sess, self.save_model_path) predict = sess.run(self.final_output, feed_dict={self._inputs: data}) return predict

模型訓練

  對樣本資料集data.csv進行RNN建模,將資料集分為訓練集和測試集,各佔70%和30%.因為圖片的大小為16*20,所以在將圖片看成序列時,序列的長度為20,每一時刻的向量含有16個元素,共有31個目標類,取隱藏層大小為300,總共訓練1000次。 完整的Python程式碼如下:

# -*- coding: utf-8 -*-
"""
數字字母識別
利用RNN對驗證碼的資料集進行多分類
"""
from TensorFlow_RNN import RNN
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer

CSV_FILE_PATH = 'F://驗證碼識別/data.csv'          # CSV 檔案路徑
df = pd.read_csv(CSV_FILE_PATH)                   # 讀取CSV檔案

# 資料集的特徵
features = ['v'+str(i+1) for i in range(16*20)]
labels = df['label'].unique()
# 對樣本的真實標籤進行標籤二值化
lb = LabelBinarizer()
lb.fit(labels)
y_ture = pd.DataFrame(lb.transform(df['label']), columns=['y'+str(i) for i in range(31)])
y_bin_columns = list(y_ture.columns)

for col in y_bin_columns:
    df[col] = y_ture[col]

# 將資料集分為訓練集和測試集,訓練集70%, 測試集30%
x_train, x_test, y_train, y_test = train_test_split(df[features], df[y_bin_columns], \
                                                    train_size = 0.7, test_size=0.3, random_state=123)

# 構建RNN網路
# 模型儲存地址
MODEL_SAVE_PATH = 'F:///驗證碼識別/logs/RNN_train.ckpt'
# RNN初始化
element_size = 16
time_steps = 20
num_classes = 31
hidden_layer_size = 300
batch_size = 960

new_x_train = np.array(x_train).reshape((-1, time_steps, element_size))
new_x_test = np.array(x_test).reshape((-1, time_steps, element_size))

rnn = RNN(element_size=element_size,
          time_steps=time_steps,
          num_classes=num_classes,
          batch_size=batch_size,
          hidden_layer_size= hidden_layer_size,
          epoch=1000,
          save_model_path=MODEL_SAVE_PATH,
          )

# 訓練RNN
rnn.train(new_x_train, y_train)
# 預測資料
y_pred = rnn.predict(new_x_test)

# 預測分類
label = '123456789ABCDEFGHJKLNPQRSTUVXYZ'
prediction = []
for pred in y_pred:
    label = labels[list(pred).index(max(pred))]
    prediction.append(label)

# 計算預測的準確率
x_test['prediction'] = prediction
x_test['label'] = df['label'][y_test.index]
print(x_test.head())
accuracy = accuracy_score(x_test['prediction'], x_test['label'])
print('CNN的預測準確率為%.2f%%.'%(accuracy*100))

以下是模型訓練的結果:

2018-09-26 11:18:12,339 - INFO: Initialize RNN model...
2018-09-26 11:18:12,340 - INFO: Training RNN model...
已訓練0次, loss: 3.43417.
已訓練20次, loss: 3.42695.
已訓練40次, loss: 3.40638.
已訓練60次, loss: 3.33286.
已訓練80次, loss: 2.78305.
已訓練100次, loss: 2.33391.
已訓練120次, loss: 1.15871.
已訓練140次, loss: 0.659932.
已訓練160次, loss: 0.566225.
已訓練180次, loss: 0.397372.
已訓練200次, loss: 0.317218.
已訓練220次, loss: 0.346782.
已訓練240次, loss: 0.639625.
已訓練260次, loss: 0.0575929.
已訓練280次, loss: 0.100429.
已訓練300次, loss: 0.0347529.
已訓練320次, loss: 0.0189503.
已訓練340次, loss: 0.0265893.
已訓練360次, loss: 0.0151181.
已訓練380次, loss: 1.18662.
已訓練400次, loss: 0.0164317.
已訓練420次, loss: 0.00819814.
已訓練440次, loss: 0.0041992.
已訓練460次, loss: 0.0206414.
已訓練480次, loss: 0.00826409.
已訓練500次, loss: 0.00398952.
已訓練520次, loss: 0.00214751.
已訓練540次, loss: 0.0365587.
已訓練560次, loss: 0.00738376.
已訓練580次, loss: 0.00302118.
已訓練600次, loss: 0.00161713.
已訓練620次, loss: 0.000885372.
已訓練640次, loss: 1.24874.
已訓練660次, loss: 0.00601175.
已訓練680次, loss: 0.0023275.
已訓練700次, loss: 0.00121995.
已訓練720次, loss: 0.000705643.
已訓練740次, loss: 0.000407971.
已訓練760次, loss: 0.000219642.
已訓練780次, loss: 0.0889083.
已訓練800次, loss: 0.00395974.
已訓練820次, loss: 0.00131215.
已訓練840次, loss: 0.000631665.
已訓練860次, loss: 0.000342329.
已訓練880次, loss: 0.000191806.
已訓練900次, loss: 0.000108547.
已訓練920次, loss: 6.29806e-05.
已訓練940次, loss: 3.99281e-05.
已訓練960次, loss: 0.0124334.
已訓練980次, loss: 0.00142853.
2018-09-26 11:26:08,302 - INFO: Saving RNN model...
已訓練1000次, loss: 0.000571731.
2018-09-26 11:26:08,761 - INFO: Restoring RNN model...
INFO:tensorflow:Restoring parameters from F:///驗證碼識別/logs/RNN_train.ckpt
2018-09-26 11:26:08,761 - INFO: Restoring parameters from F:///驗證碼識別/logs/RNN_train.ckpt
      v1  v2  v3  v4  v5  v6  v7  v8  v9  v10  ...    v313  v314  v315  v316  \
657    1   1   1   1   1   1   1   1   1    1  ...       1     1     1     1   
18     1   1   1   1   1   1   1   1   1    1  ...       1     1     1     1   
700    1   1   1   1   1   1   1   1   1    1  ...       1     1     1     1   
221    1   1   1   1   1   1   1   1   1    1  ...       1     1     1     1   
1219   1   1   1   1   1   1   1   1   1    1  ...       1     1     1     1   

      v317  v318  v319  v320  prediction  label  
657      1     1     1     1           G      G  
18       1     1     1     1           1      1  
700      1     1     1     1           H      H  
221      1     1     1     1           5      5  
1219     1     1     1     1           V      V  

[5 rows x 322 columns]
CNN的預測準確率為93.69%.

總共的訓練時間為8分鐘,在測試集上的準確為93.69%.與CNN相比,測試集上的準確率略高,訓練時間卻明顯減少,因為CNN訓練1000次的時間為75分鐘。總的來說,該RNN模型在這個資料集的表現優於之前的CNN模型。

模型預測

  接著,我們利用剛才訓練好的CNN模型,對新驗證碼進行識別,看看模型的識別效果。   筆者採集了50張新驗證碼,如下:

新驗證碼

  完整的預測新驗證碼的Python指令碼如下:

# -*- coding: utf-8 -*-

"""
利用訓練好的RNN模型對驗證碼進行識別
(共訓練960條資料,訓練1000次測試集上的準確率為95.15%.)
"""
import os
import cv2
import pandas as pd
import numpy as np
from TensorFlow_RNN import RNN

def split_picture(imagepath):

    # 以灰度模式讀取圖片
    gray = cv2.imread(imagepath, 0)

    # 將圖片的邊緣變為白色
    height, width = gray.shape
    for i in range(width):
        gray[0, i] = 255
        gray[height-1, i] = 255
    for j in range(height):
        gray[j, 0] = 255
        gray[j, width-1] = 255

    # 中值濾波
    blur = cv2.medianBlur(gray, 3) #模板大小3*3

    # 二值化
    ret,thresh1 = cv2.threshold(blur, 200, 255, cv2.THRESH_BINARY)

    # 提取單個字元
    chars_list = []
    image, contours, hierarchy = cv2.findContours(thresh1, 2, 2)
    for cnt in contours:
        # 最小的外接矩形
        x, y, w, h = cv2.boundingRect(cnt)
        if x != 0 and y != 0 and w*h >= 100:
            chars_list.append((x,y,w,h))

    sorted_chars_list = sorted(chars_list, key=lambda x:x[0])
    for i,item in enumerate(sorted_chars_list):
        x, y, w, h = item
        cv2.imwrite('F://chars/%d.jpg'%(i+1), thresh1[y:y+h, x:x+w])

def remove_edge_picture(imagepath):

    image = cv2.imread(imagepath, 0)
    height, width = image.shape
    corner_list = [image[0,0] < 127,
                   image[height-1, 0] < 127,
                   image[0, width-1]<127,
                   image[ height-1, width-1] < 127
                   ]
    if sum(corner_list) >= 3:
        os.remove(imagepath)

def resplit_with_parts(imagepath, parts):
    image = cv2.imread(imagepath, 0)
    os.remove(imagepath)
    height, width = image.shape

    file_name = imagepath.split('/')[-1].split(r'.')[0]
    # 將圖片重新分裂成parts部分
    step = width//parts     # 步長
    start = 0             # 起始位置
    for i in range(parts):
        cv2.imwrite('F://chars/%s.jpg'%(file_name+'-'+str(i)), \
                    image[:, start:start+step])
        start += step

def resplit(imagepath):

    image = cv2.imread(imagepath, 0)
    height, width = image.shape

    if width >= 64:
        resplit_with_parts(imagepath, 4)
    elif width >= 48:
        resplit_with_parts(imagepath, 3)
    elif width >= 26:
        resplit_with_parts(imagepath, 2)

# rename and convert to 16*20 size
def convert(dir, file):

    imagepath = dir+'/'+file
    # 讀取圖片
    image = cv2.imread(imagepath, 0)
    # 二值化
    ret, thresh = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    img = cv2.resize(thresh, (16, 20), interpolation=cv2.INTER_AREA)
    # 儲存圖片
    cv2.imwrite('%s/%s' % (dir, file), img)

# 讀取圖片的資料,並轉化為0-1值
def Read_Data(dir, file):

    imagepath = dir+'/'+file
    # 讀取圖片
    image = cv2.imread(imagepath, 0)
    # 二值化
    ret, thresh = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    # 顯示圖片
    bin_values = [1 if pixel==255 else 0 for pixel in thresh.ravel()]

    return bin_values

def predict(rnn, VerifyCodePath, time_steps, element_size):
    dir = 'F://chars'
    files = os.listdir(dir)

    # 清空原有的檔案
    if files:
        for file in files:
            os.remove(dir + '/' + file)

    split_picture(VerifyCodePath)

    files = os.listdir(dir)
    if not files:
        print('檢視的資料夾為空!')
    else:

        # 去除噪聲圖片
        for file in files:
            remove_edge_picture(dir + '/' + file)

        # 對黏連圖片進行重分割
        for file in os.listdir(dir):
            resplit(dir + '/' + file)

        # 將圖片統一調整至16*20大小
        for file in os.listdir(dir):
            convert(dir, file)

        # 圖片中的字元代表的向量
        files = sorted(os.listdir(dir), key=lambda x: x[0])
        table = [Read_Data(dir, file) for file in files]

        test_data = pd.DataFrame(table, columns=['v%d' % i for i in range(1, 321)])



        new_test_data = np.array(test_data).reshape((-1, time_steps, element_size))

        y_pred = rnn.predict(new_test_data)

        # 預測分類
        prediction = []
        labels = '123456789ABCDEFGHJKLNPQRSTUVXYZ'
        for pred in y_pred:
            label = labels[list(pred).index(max(pred))]
            prediction.append(label)

    TRUE_LABEL = VerifyCodePath.split('/')[-1].split(r'.')[0]

    return TRUE_LABEL, ''.join(prediction)

def main():

    # 建立RNN預測模型
    # 模型儲存地址
    MODEL_SAVE_PATH = 'F:///驗證碼識別/logs/RNN_train.ckpt'
    # RNN初始化
    element_size = 16
    time_steps = 20
    num_classes = 31
    batch_size = 4
    hidden_layer_size = 300
    rnn = RNN(element_size=element_size,
              time_steps=time_steps,
              num_classes=num_classes,
              batch_size=batch_size,
              hidden_layer_size=hidden_layer_size,
              epoch=1000,
              save_model_path=MODEL_SAVE_PATH,
              )

    # 預測驗證碼
    pred_list = []
    dir = 'F://VerifyCode/'
    for file in os.listdir(dir):
        VerifyCodePath = dir+file
        label, prediction = predict(rnn, VerifyCodePath, time_steps, element_size)
        pred_list.append((label, prediction))
        # print('真實值為:%s, 預測結果為: %s.'%(label, prediction))

    # 統計預測正確的驗證碼的數量及準確率
    total_images = len(pred_list)
    correct_pred