TensorFlow車牌識別完整版(含車牌資料集)
在之前釋出的一篇博文《MNIST資料集實現車牌識別--初步演示版》中,我們演示瞭如何使用TensorFlow進行車牌識別,但是,當時採用的資料集是MNIST數字手寫體,只能分類0-9共10個數字,無法分類省份簡稱和字母,侷限性較大,無實際意義。
經過影象定位分割處理,博主收集了相關省份簡稱和26個字母的圖片資料集,結合前述博文中貼出的python+TensorFlow程式碼,實現了完整的車牌識別功能。本著分享精神,在此送上全部程式碼和車牌資料集。
省份簡稱訓練+識別程式碼(儲存檔名為train-license-province.py)(拷貝程式碼請務必注意python文字縮排,只要有一處縮排錯誤,就無法得到正確結果,或者出現異常
-
#!/usr/bin/python3.5
-
# -*- coding: utf-8 -*-
-
import sys
-
import os
-
import time
-
import random
-
import numpy as np
-
import tensorflow as tf
-
from PIL import Image
-
SIZE = 1280
-
WIDTH = 32
-
HEIGHT = 40
-
NUM_CLASSES = 6
-
iterations = 300
-
SAVER_DIR = "train-saver/province/"
-
PROVINCES = ("京","閩","粵","蘇","滬","浙")
-
nProvinceIndex = 0
-
time_begin = time.time()
-
# 定義輸入節點,對應於圖片畫素值矩陣集合和圖片標籤(即所代表的數字)
-
x = tf.placeholder(tf.float32, shape=[None, SIZE])
-
y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])
-
x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
-
# 定義卷積函式
-
def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):
-
L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)
-
L1_relu = tf.nn.relu(L1_conv + b)
-
return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')
-
# 定義全連線層函式
-
def full_connect(inputs, W, b):
-
return tf.nn.relu(tf.matmul(inputs, W) + b)
-
if __name__ =='__main__' and sys.argv[1]=='train':
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
input_count = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/training-set/chinese-characters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
input_count += 1
-
# 定義對應維數和各維長度的陣列
-
input_images = np.array([[0]*SIZE for i in range(input_count)])
-
input_labels = np.array([[0]*NUM_CLASSES for i in range(input_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/training-set/chinese-characters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
input_images[index][w+h*width] = 0
-
else:
-
input_images[index][w+h*width] = 1
-
input_labels[index][i] = 1
-
index += 1
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
val_count = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/validation-set/chinese-characters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
val_count += 1
-
# 定義對應維數和各維長度的陣列
-
val_images = np.array([[0]*SIZE for i in range(val_count)])
-
val_labels = np.array([[0]*NUM_CLASSES for i in range(val_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/validation-set/chinese-characters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
val_images[index][w+h*width] = 0
-
else:
-
val_images[index][w+h*width] = 1
-
val_labels[index][i] = 1
-
index += 1
-
with tf.Session() as sess:
-
# 第一個卷積層
-
W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1")
-
b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2")
-
b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512], stddev=0.1), name="W_fc1")
-
b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES], stddev=0.1), name="W_fc2")
-
b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
-
# 定義優化器和訓練op
-
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
-
train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)
-
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
# 初始化saver
-
saver = tf.train.Saver()
-
sess.run(tf.global_variables_initializer())
-
time_elapsed = time.time() - time_begin
-
print("讀取圖片檔案耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
print ("一共讀取了 %s 個訓練影象, %s 個標籤" % (input_count, input_count))
-
# 設定每次訓練op的輸入個數和迭代次數,這裡為了支援任意圖片總數,定義了一個餘數remainder,譬如,如果每次訓練op的輸入個數為60,圖片總數為150張,則前面兩次各輸入60張,最後一次輸入30張(餘數30)
-
batch_size = 60
-
iterations = iterations
-
batches_count = int(input_count / batch_size)
-
remainder = input_count % batch_size
-
print ("訓練資料集分成 %s 批, 前面每批 %s 個數據,最後一批 %s 個數據" % (batches_count+1, batch_size, remainder))
-
# 執行訓練迭代
-
for it in range(iterations):
-
# 這裡的關鍵是要把輸入陣列轉為np.array
-
for n in range(batches_count):
-
train_step.run(feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y_: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
-
if remainder > 0:
-
start_index = batches_count * batch_size;
-
train_step.run(feed_dict={x: input_images[start_index:input_count-1], y_: input_labels[start_index:input_count-1], keep_prob: 0.5})
-
# 每完成五次迭代,判斷準確度是否已達到100%,達到則退出迭代迴圈
-
iterate_accuracy = 0
-
if it%5 == 0:
-
iterate_accuracy = accuracy.eval(feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0})
-
print ('第 %d 次訓練迭代: 準確率 %0.5f%%' % (it, iterate_accuracy*100))
-
if iterate_accuracy >= 0.9999 and it >= 150:
-
break;
-
print ('完成訓練!')
-
time_elapsed = time.time() - time_begin
-
print ("訓練耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
# 儲存訓練結果
-
if not os.path.exists(SAVER_DIR):
-
print ('不存在訓練資料儲存目錄,現在建立儲存目錄')
-
os.makedirs(SAVER_DIR)
-
saver_path = saver.save(sess, "%smodel.ckpt"%(SAVER_DIR))
-
if __name__ =='__main__' and sys.argv[1]=='predict':
-
saver = tf.train.import_meta_graph("%smodel.ckpt.meta"%(SAVER_DIR))
-
with tf.Session() as sess:
-
model_file=tf.train.latest_checkpoint(SAVER_DIR)
-
saver.restore(sess, model_file)
-
# 第一個卷積層
-
W_conv1 = sess.graph.get_tensor_by_name("W_conv1:0")
-
b_conv1 = sess.graph.get_tensor_by_name("b_conv1:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = sess.graph.get_tensor_by_name("W_conv2:0")
-
b_conv2 = sess.graph.get_tensor_by_name("b_conv2:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = sess.graph.get_tensor_by_name("W_fc1:0")
-
b_fc1 = sess.graph.get_tensor_by_name("b_fc1:0")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = sess.graph.get_tensor_by_name("W_fc2:0")
-
b_fc2 = sess.graph.get_tensor_by_name("b_fc2:0")
-
# 定義優化器和訓練op
-
conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
-
for n in range(1,2):
-
path = "test_images/%s.bmp" % (n)
-
img = Image.open(path)
-
width = img.size[0]
-
height = img.size[1]
-
img_data = [[0]*SIZE for i in range(1)]
-
for h in range(0, height):
-
for w in range(0, width):
-
if img.getpixel((w, h)) < 190:
-
img_data[0][w+h*width] = 1
-
else:
-
img_data[0][w+h*width] = 0
-
result = sess.run(conv, feed_dict = {x: np.array(img_data), keep_prob: 1.0})
-
max1 = 0
-
max2 = 0
-
max3 = 0
-
max1_index = 0
-
max2_index = 0
-
max3_index = 0
-
for j in range(NUM_CLASSES):
-
if result[0][j] > max1:
-
max1 = result[0][j]
-
max1_index = j
-
continue
-
if (result[0][j]>max2) and (result[0][j]<=max1):
-
max2 = result[0][j]
-
max2_index = j
-
continue
-
if (result[0][j]>max3) and (result[0][j]<=max2):
-
max3 = result[0][j]
-
max3_index = j
-
continue
-
nProvinceIndex = max1_index
-
print ("概率: [%s %0.2f%%] [%s %0.2f%%] [%s %0.2f%%]" % (PROVINCES[max1_index],max1*100, PROVINCES[max2_index],max2*100, PROVINCES[max3_index],max3*100))
-
print ("省份簡稱是: %s" % PROVINCES[nProvinceIndex])
城市代號訓練+識別程式碼(儲存檔名為train-license-letters.py):
-
#!/usr/bin/python3.5
-
# -*- coding: utf-8 -*-
-
import sys
-
import os
-
import time
-
import random
-
import numpy as np
-
import tensorflow as tf
-
from PIL import Image
-
SIZE = 1280
-
WIDTH = 32
-
HEIGHT = 40
-
NUM_CLASSES = 26
-
iterations = 500
-
SAVER_DIR = "train-saver/letters/"
-
LETTERS_DIGITS = ("A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z","I","O")
-
license_num = ""
-
time_begin = time.time()
-
# 定義輸入節點,對應於圖片畫素值矩陣集合和圖片標籤(即所代表的數字)
-
x = tf.placeholder(tf.float32, shape=[None, SIZE])
-
y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])
-
x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
-
# 定義卷積函式
-
def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):
-
L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)
-
L1_relu = tf.nn.relu(L1_conv + b)
-
return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')
-
# 定義全連線層函式
-
def full_connect(inputs, W, b):
-
return tf.nn.relu(tf.matmul(inputs, W) + b)
-
if __name__ =='__main__' and sys.argv[1]=='train':
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
input_count = 0
-
for i in range(0+10,NUM_CLASSES+10):
-
dir = './train_images/training-set/letters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
input_count += 1
-
# 定義對應維數和各維長度的陣列
-
input_images = np.array([[0]*SIZE for i in range(input_count)])
-
input_labels = np.array([[0]*NUM_CLASSES for i in range(input_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0+10,NUM_CLASSES+10):
-
dir = './train_images/training-set/letters/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
input_images[index][w+h*width] = 0
-
else:
-
input_images[index][w+h*width] = 1
-
#print ("i=%d, index=%d" % (i, index))
-
input_labels[index][i-10] = 1
-
index += 1
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
val_count = 0
-
for i in range(0+10,NUM_CLASSES+10):
-
dir = './train_images/validation-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
val_count += 1
-
# 定義對應維數和各維長度的陣列
-
val_images = np.array([[0]*SIZE for i in range(val_count)])
-
val_labels = np.array([[0]*NUM_CLASSES for i in range(val_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0+10,NUM_CLASSES+10):
-
dir = './train_images/validation-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
val_images[index][w+h*width] = 0
-
else:
-
val_images[index][w+h*width] = 1
-
val_labels[index][i-10] = 1
-
index += 1
-
with tf.Session() as sess:
-
# 第一個卷積層
-
W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1")
-
b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2")
-
b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512], stddev=0.1), name="W_fc1")
-
b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES], stddev=0.1), name="W_fc2")
-
b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
-
# 定義優化器和訓練op
-
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
-
train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)
-
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
sess.run(tf.global_variables_initializer())
-
time_elapsed = time.time() - time_begin
-
print("讀取圖片檔案耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
print ("一共讀取了 %s 個訓練影象, %s 個標籤" % (input_count, input_count))
-
# 設定每次訓練op的輸入個數和迭代次數,這裡為了支援任意圖片總數,定義了一個餘數remainder,譬如,如果每次訓練op的輸入個數為60,圖片總數為150張,則前面兩次各輸入60張,最後一次輸入30張(餘數30)
-
batch_size = 60
-
iterations = iterations
-
batches_count = int(input_count / batch_size)
-
remainder = input_count % batch_size
-
print ("訓練資料集分成 %s 批, 前面每批 %s 個數據,最後一批 %s 個數據" % (batches_count+1, batch_size, remainder))
-
# 執行訓練迭代
-
for it in range(iterations):
-
# 這裡的關鍵是要把輸入陣列轉為np.array
-
for n in range(batches_count):
-
train_step.run(feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y_: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
-
if remainder > 0:
-
start_index = batches_count * batch_size;
-
train_step.run(feed_dict={x: input_images[start_index:input_count-1], y_: input_labels[start_index:input_count-1], keep_prob: 0.5})
-
# 每完成五次迭代,判斷準確度是否已達到100%,達到則退出迭代迴圈
-
iterate_accuracy = 0
-
if it%5 == 0:
-
iterate_accuracy = accuracy.eval(feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0})
-
print ('第 %d 次訓練迭代: 準確率 %0.5f%%' % (it, iterate_accuracy*100))
-
if iterate_accuracy >= 0.9999 and it >= iterations:
-
break;
-
print ('完成訓練!')
-
time_elapsed = time.time() - time_begin
-
print ("訓練耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
# 儲存訓練結果
-
if not os.path.exists(SAVER_DIR):
-
print ('不存在訓練資料儲存目錄,現在建立儲存目錄')
-
os.makedirs(SAVER_DIR)
-
# 初始化saver
-
saver = tf.train.Saver()
-
saver_path = saver.save(sess, "%smodel.ckpt"%(SAVER_DIR))
-
if __name__ =='__main__' and sys.argv[1]=='predict':
-
saver = tf.train.import_meta_graph("%smodel.ckpt.meta"%(SAVER_DIR))
-
with tf.Session() as sess:
-
model_file=tf.train.latest_checkpoint(SAVER_DIR)
-
saver.restore(sess, model_file)
-
# 第一個卷積層
-
W_conv1 = sess.graph.get_tensor_by_name("W_conv1:0")
-
b_conv1 = sess.graph.get_tensor_by_name("b_conv1:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = sess.graph.get_tensor_by_name("W_conv2:0")
-
b_conv2 = sess.graph.get_tensor_by_name("b_conv2:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = sess.graph.get_tensor_by_name("W_fc1:0")
-
b_fc1 = sess.graph.get_tensor_by_name("b_fc1:0")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = sess.graph.get_tensor_by_name("W_fc2:0")
-
b_fc2 = sess.graph.get_tensor_by_name("b_fc2:0")
-
# 定義優化器和訓練op
-
conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
-
for n in range(2,3):
-
path = "test_images/%s.bmp" % (n)
-
img = Image.open(path)
-
width = img.size[0]
-
height = img.size[1]
-
img_data = [[0]*SIZE for i in range(1)]
-
for h in range(0, height):
-
for w in range(0, width):
-
if img.getpixel((w, h)) < 190:
-
img_data[0][w+h*width] = 1
-
else:
-
img_data[0][w+h*width] = 0
-
result = sess.run(conv, feed_dict = {x: np.array(img_data), keep_prob: 1.0})
-
max1 = 0
-
max2 = 0
-
max3 = 0
-
max1_index = 0
-
max2_index = 0
-
max3_index = 0
-
for j in range(NUM_CLASSES):
-
if result[0][j] > max1:
-
max1 = result[0][j]
-
max1_index = j
-
continue
-
if (result[0][j]>max2) and (result[0][j]<=max1):
-
max2 = result[0][j]
-
max2_index = j
-
continue
-
if (result[0][j]>max3) and (result[0][j]<=max2):
-
max3 = result[0][j]
-
max3_index = j
-
continue
-
if n == 3:
-
license_num += "-"
-
license_num = license_num + LETTERS_DIGITS[max1_index]
-
print ("概率: [%s %0.2f%%] [%s %0.2f%%] [%s %0.2f%%]" % (LETTERS_DIGITS[max1_index],max1*100, LETTERS_DIGITS[max2_index],max2*100, LETTERS_DIGITS[max3_index],max3*100))
-
print ("城市代號是: 【%s】" % license_num)
車牌編號訓練+識別程式碼(儲存檔名為train-license-digits.py):
-
#!/usr/bin/python3.5
-
# -*- coding: utf-8 -*-
-
import sys
-
import os
-
import time
-
import random
-
import numpy as np
-
import tensorflow as tf
-
from PIL import Image
-
SIZE = 1280
-
WIDTH = 32
-
HEIGHT = 40
-
NUM_CLASSES = 34
-
iterations = 1000
-
SAVER_DIR = "train-saver/digits/"
-
LETTERS_DIGITS = ("0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z")
-
license_num = ""
-
time_begin = time.time()
-
# 定義輸入節點,對應於圖片畫素值矩陣集合和圖片標籤(即所代表的數字)
-
x = tf.placeholder(tf.float32, shape=[None, SIZE])
-
y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])
-
x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
-
# 定義卷積函式
-
def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):
-
L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)
-
L1_relu = tf.nn.relu(L1_conv + b)
-
return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')
-
# 定義全連線層函式
-
def full_connect(inputs, W, b):
-
return tf.nn.relu(tf.matmul(inputs, W) + b)
-
if __name__ =='__main__' and sys.argv[1]=='train':
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
input_count = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/training-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
input_count += 1
-
# 定義對應維數和各維長度的陣列
-
input_images = np.array([[0]*SIZE for i in range(input_count)])
-
input_labels = np.array([[0]*NUM_CLASSES for i in range(input_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/training-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
input_images[index][w+h*width] = 0
-
else:
-
input_images[index][w+h*width] = 1
-
input_labels[index][i] = 1
-
index += 1
-
# 第一次遍歷圖片目錄是為了獲取圖片總數
-
val_count = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/validation-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
val_count += 1
-
# 定義對應維數和各維長度的陣列
-
val_images = np.array([[0]*SIZE for i in range(val_count)])
-
val_labels = np.array([[0]*NUM_CLASSES for i in range(val_count)])
-
# 第二次遍歷圖片目錄是為了生成圖片資料和標籤
-
index = 0
-
for i in range(0,NUM_CLASSES):
-
dir = './train_images/validation-set/%s/' % i # 這裡可以改成你自己的圖片目錄,i為分類標籤
-
for rt, dirs, files in os.walk(dir):
-
for filename in files:
-
filename = dir + filename
-
img = Image.open(filename)
-
width = img.size[0]
-
height = img.size[1]
-
for h in range(0, height):
-
for w in range(0, width):
-
# 通過這樣的處理,使數字的線條變細,有利於提高識別準確率
-
if img.getpixel((w, h)) > 230:
-
val_images[index][w+h*width] = 0
-
else:
-
val_images[index][w+h*width] = 1
-
val_labels[index][i] = 1
-
index += 1
-
with tf.Session() as sess:
-
# 第一個卷積層
-
W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1")
-
b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2")
-
b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512], stddev=0.1), name="W_fc1")
-
b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES], stddev=0.1), name="W_fc2")
-
b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
-
# 定義優化器和訓練op
-
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
-
train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)
-
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
sess.run(tf.global_variables_initializer())
-
time_elapsed = time.time() - time_begin
-
print("讀取圖片檔案耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
print ("一共讀取了 %s 個訓練影象, %s 個標籤" % (input_count, input_count))
-
# 設定每次訓練op的輸入個數和迭代次數,這裡為了支援任意圖片總數,定義了一個餘數remainder,譬如,如果每次訓練op的輸入個數為60,圖片總數為150張,則前面兩次各輸入60張,最後一次輸入30張(餘數30)
-
batch_size = 60
-
iterations = iterations
-
batches_count = int(input_count / batch_size)
-
remainder = input_count % batch_size
-
print ("訓練資料集分成 %s 批, 前面每批 %s 個數據,最後一批 %s 個數據" % (batches_count+1, batch_size, remainder))
-
# 執行訓練迭代
-
for it in range(iterations):
-
# 這裡的關鍵是要把輸入陣列轉為np.array
-
for n in range(batches_count):
-
train_step.run(feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y_: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
-
if remainder > 0:
-
start_index = batches_count * batch_size;
-
train_step.run(feed_dict={x: input_images[start_index:input_count-1], y_: input_labels[start_index:input_count-1], keep_prob: 0.5})
-
# 每完成五次迭代,判斷準確度是否已達到100%,達到則退出迭代迴圈
-
iterate_accuracy = 0
-
if it%5 == 0:
-
iterate_accuracy = accuracy.eval(feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0})
-
print ('第 %d 次訓練迭代: 準確率 %0.5f%%' % (it, iterate_accuracy*100))
-
if iterate_accuracy >= 0.9999 and it >= iterations:
-
break;
-
print ('完成訓練!')
-
time_elapsed = time.time() - time_begin
-
print ("訓練耗費時間:%d秒" % time_elapsed)
-
time_begin = time.time()
-
# 儲存訓練結果
-
if not os.path.exists(SAVER_DIR):
-
print ('不存在訓練資料儲存目錄,現在建立儲存目錄')
-
os.makedirs(SAVER_DIR)
-
# 初始化saver
-
saver = tf.train.Saver()
-
saver_path = saver.save(sess, "%smodel.ckpt"%(SAVER_DIR))
-
if __name__ =='__main__' and sys.argv[1]=='predict':
-
saver = tf.train.import_meta_graph("%smodel.ckpt.meta"%(SAVER_DIR))
-
with tf.Session() as sess:
-
model_file=tf.train.latest_checkpoint(SAVER_DIR)
-
saver.restore(sess, model_file)
-
# 第一個卷積層
-
W_conv1 = sess.graph.get_tensor_by_name("W_conv1:0")
-
b_conv1 = sess.graph.get_tensor_by_name("b_conv1:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 2, 2, 1]
-
pool_strides = [1, 2, 2, 1]
-
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 第二個卷積層
-
W_conv2 = sess.graph.get_tensor_by_name("W_conv2:0")
-
b_conv2 = sess.graph.get_tensor_by_name("b_conv2:0")
-
conv_strides = [1, 1, 1, 1]
-
kernel_size = [1, 1, 1, 1]
-
pool_strides = [1, 1, 1, 1]
-
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
-
# 全連線層
-
W_fc1 = sess.graph.get_tensor_by_name("W_fc1:0")
-
b_fc1 = sess.graph.get_tensor_by_name("b_fc1:0")
-
h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20*32])
-
h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)
-
# dropout
-
keep_prob = tf.placeholder(tf.float32)
-
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
# readout層
-
W_fc2 = sess.graph.get_tensor_by_name("W_fc2:0")
-
b_fc2 = sess.graph.get_tensor_by_name("b_fc2:0")
-
# 定義優化器和訓練op
-
conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
-
for n in range(3,8):
-
path = "test_images/%s.bmp" % (n)
-
img = Image.open(path)
-
width = img.size[0]
-
height = img.size[1]
-
img_data = [[0]*SIZE for i in range(1)]
-
for h in range(0, height):
-
for w in range(0, width):
-
if img.getpixel((w, h)) < 190:
-
img_data[0][w+h*width] = 1
-
else:
-
img_data[0][w+h*width] = 0
-
result = sess.run(conv, feed_dict = {x: np.array(img_data), keep_prob: 1.0})
-
max1 = 0
-
max2 = 0
-
max3 = 0
-
max1_index = 0
-
max2_index = 0
-
max3_index = 0
-
for j in range(NUM_CLASSES):
-
if result[0][j] > max1:
-
max1 = result[0][j]
-
max1_index = j
-
continue
-
if (result[0][j]>max2) and (result[0][j]<=max1):
-
max2 = result[0][j]
-
max2_index = j
-
continue
-
if (result[0][j]>max3) and (result[0][j]<=max2):
-
max3 = result[0][j]
-
max3_index = j
-
continue
-
license_num = license_num + LETTERS_DIGITS[max1_index]
-
print ("概率: [%s %0.2f%%] [%s %0.2f%%] [%s %0.2f%%]" % (LETTERS_DIGITS[max1_index],max1*100, LETTERS_DIGITS[max2_index],max2*100, LETTERS_DIGITS[max3_index],max3*100))
-
print ("車牌編號是: 【%s】" % license_num)
儲存好上面三個python指令碼後,我們首先進行省份簡稱訓練。在執行程式碼之前,需要先把資料集解壓到訓練指令碼所在目錄。然後,在命令列中進入指令碼所在目錄,輸入執行如下命令:
python train-license-province.py train
訓練結果如下:
然後進行省份簡稱識別,在命令列輸入執行如下命令:
python train-license-province.py predict
執行城市代號訓練(相當於訓練26個字母):
python train-license-letters.py train
識別城市代號:
python train-license-letters.py predict
執行車牌編號訓練(相當於訓練24個字母+10個數字,我國交通法規規定車牌編號中不包含字母I和O):
python train-license-digits.py train
識別車牌編號:
python train-license-digits.py predict
可以看到,在測試圖片上,識別準確率很高。識別結果是閩O-1672Q。
下圖是測試圖片的車牌原圖: