1. 程式人生 > >Keras初探(二)——識別驗證碼

Keras初探(二)——識別驗證碼

訪問本站觀看效果更佳 繼上篇對於Keras的初步探討之後,我將給出一個例子講解如何利用Keras用於處理影象分類問題,今天我們先探討一下識別驗證碼的問題。

一、探討內容

1、資料來源
2、模型搭建
3、優化問題

二、資料來源

在本文中,我打算對驗證碼進行識別,有一個python包——captcha,利用它可生成驗證碼。當然使用前需要先匯入相關packages。

sudo pip3 install captcha

import cv2
import numpy as np
from captcha.image import ImageCaptcha

這裡可以設定驗證碼的大小為28*28,字型大小24。比如下面兩張圖片,第一張是5,第二張是6。干擾相對較大。 image1_5.jpg

image2_6.jpg 下面給出完整程式碼

import cv2
import numpy as np
from captcha.image import ImageCaptcha

def generate_captcha(text):
    
    capt= ImageCaptcha(width=28,height=28,font_sizes = [24])
    image = capt.generate_image(text)
    image = np.array(image,dtype=np.uint8)
    return image

if __name__ == '__main__'
: output_dir = './datasets/images/' for i in range(5000): label = np.random.randint(0,10) image = generate_captcha(str(label)) image_name = 'image{}_{}.jpg'.format(i+1,label) output_path = output_dir +image_name cv2.imwrite(output_path,image)

儲存檔案為gendata.py,執行檔案後生成5000張驗證碼圖片。這裡只是實驗性質,所以驗證碼圖片數量較少,大家自己做實驗的時候可以適當增加一些圖片數量。

三、模型搭建

一開始我們可以搭建一個非常簡單的LeNet來進行驗證和測試。儲存下列檔案命名為lenet.py

# import the necessary packages
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras import backend as K
 
class LeNet:
    @staticmethod
    def build(width, height, depth, classes):
        # initialize the model
        model = Sequential()
        inputShape = (height, width, depth)
        # if we are using "channels last", update the input shape
        if K.image_data_format() == "channels_first":   #for tensorflow
            inputShape = (depth, height, width)
        # first set of CONV => RELU => POOL layers
        model.add(Conv2D(20, (5, 5),padding="same",input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
        #second set of CONV => RELU => POOL layers
        model.add(Conv2D(50, (5, 5), padding="same"))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(500))
        model.add(Activation("relu"))

        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))

        # return the constructed network architecture
        return model

接著我們載入資料,每張圖片對應的數字是放在檔名’_'之後。

def get_data(images_path):
    if not os.path.exists(images_path):
        raise ValueError('images_path is not exist.')

    images = []
    labels = []
    images_path = os.path.join(images_path,'*.jpg')
    count = 0
    for image_file in glob.glob(images_path):
        count +=1
        if count % 100 == 0:
            print('Load{} images .'.format(count))
        image = cv2.imread(image_file)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (norm_size, norm_size))
        label = int(image_file.split('_')[-1].split('.')[0])
        images.append(image)
        labels.append(label)
    images = np.array(images)
    labels = np.array(labels)

    (trainX, testX, trainY, testY) = train_test_split(images,
            labels, test_size=0.25, random_state=42)

    # convert the labels from integers to vectors
    trainY = to_categorical(trainY, num_classes=CLASS_NUM)
    testY = to_categorical(testY, num_classes=CLASS_NUM)   
    return trainX,trainY,testX,testY

經過處理我們得到訓練集和測試集。我們先放出來完整程式碼train.py,然後我們在程式碼基礎上加以修改。執行命令如下

python3 train.py -d images/ -m my.model

其中images/ 為驗證碼存放目錄,my.model為模型儲存位置。

import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
import glob
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
#from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import cv2
import os
import sys
sys.path.append('..')
from lenet import LeNet



def args_parse():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-d", "--dataset", required=True,
        help="path to input dataset")
    ap.add_argument("-m", "--model", required=True,
        help="path to output model")
    ap.add_argument("-p", "--plot", type=str, default="plot.png",
        help="path to output accuracy/loss plot")
    args = vars(ap.parse_args()) 
    return args


args = args_parse()

# initialize the number of epochs to train for, initial learning rate,
# and batch size
EPOCHS = 200
INIT_LR = 1e-2
BS = 128
CLASS_NUM = 10
norm_size = 32
# initialize the data and labels

def get_data(images_path):
    if not os.path.exists(images_path):
        raise ValueError('images_path is not exist.')

    images = []
    labels = []
    images_path = os.path.join(images_path,'*.jpg')
    count = 0
    for image_file in glob.glob(images_path):
        count +=1
        if count % 100 == 0:
            print('Load{} images .'.format(count))
        image = cv2.imread(image_file)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (norm_size, norm_size))
        label = int(image_file.split('_')[-1].split('.')[0])
        images.append(image)
        labels.append(label)
    images = np.array(images)
    labels = np.array(labels)

    (trainX, testX, trainY, testY) = train_test_split(images,
            labels, test_size=0.25, random_state=42)

    # convert the labels from integers to vectors
    trainY = to_categorical(trainY, num_classes=CLASS_NUM)
    testY = to_categorical(testY, num_classes=CLASS_NUM)   
    return trainX,trainY,testX,testY

def train(aug,trainX,trainY,testX,testY,args):
    # initialize the model
    print("[INFO] compiling model...")
    model = LeNet.build(width=norm_size, height=norm_size, depth=3, classes=CLASS_NUM)
    opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
#    opt = Adam(lr=INIT_LR)
    model.compile(loss="categorical_crossentropy", optimizer=opt,
        metrics=["accuracy"])

    # train the network
    print("[INFO] training network...")
    H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
        validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
        epochs=EPOCHS, verbose=1)

    # save the model to disk
    print("[INFO] serializing network...")
    model.save(args["model"])
    
    # plot the training loss and accuracy
    plt.style.use("ggplot")
    plt.figure()
    N = EPOCHS
    plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, N), H.history["acc"], label="train_acc")
    plt.plot(np.arange(0, N), H.history["val_acc"], label="val_acc")
    plt.title("Training Loss and Accuracy on Invoice classifier")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig(args["plot"])
    
#python train.py --dataset ../../invoice_all/train  --model invoice.model
if __name__=='__main__':
    args = args_parse()
    file_path = args["dataset"]
    trainX,trainY,testX,testY = get_data(file_path)
    # construct the image generator for data augmentation
    aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
        height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
        horizontal_flip=True, fill_mode="nearest")
    train(aug,trainX,trainY,testX,testY,args)

四、優化模型

我們可以根據每次生成的圖片觀察訓練效果,這張圖是已經經過若干次修改後的結果,正確率大概為0.80,從下圖可以看到val_loss的抖動還是比較大,這是由於兩個原因:一是初始的學習率比較大,二是因為在本例中我採用了dropout,而dropoutrate設定得太高了(0.25)所以我們需要修改。plot.png

4.1 採用BatchNormalization

BatchNormalization()真是非常好用,把它放在卷積層和池化層之間能非常有效地提升效能。

model.add(Conv2D(30, (2, 2), padding="same"))
        model.add(BatchNormalization())
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

4.2 學習率衰減策略

其實我們在一開始嘗試的時候完全沒必要設定學習率衰減策略。我們大可以嘗試使用或大或小的學習率觀察結果。隨後我們可以讓學習率隨輪數衰減,以達到微調的效果。

    opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
#    opt = Adam(lr=INIT_LR)
    model.compile(loss="categorical_crossentropy", optimizer=opt,
        metrics=["accuracy"])

4.3 dropout

使用了batchnormal再使用dropout效果可能不太明顯。我們可以在最後的全連線層處使用dropout,在卷積層中間使用dropout會導致結果不可預測。

        model.add(Dense(200))
        model.add(Dropout(droprate))

4.4 資料擴充

我們把圖片變形扭曲增加資料來源

aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
        height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
        horizontal_flip=True, fill_mode="nearest")

現在的結果如下圖所示,由於訓練輪數(200)不是特別多,所以效果還不是很好正確率大概在85%。有興趣的朋友可以在此基礎上加以修改一下。 plot.png 完整程式碼參見 code