使用兩層卷積神經網路來實現手寫數字集(面向物件)
阿新 • • 發佈:2018-11-05
介紹使用卷積神經網路來實現手寫數字集的識別
主要採用面向物件的變成方法實現, 程式碼可以直接執行,分別有訓練模型,儲存模型,和運用儲存好的模型測試單張圖片
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import cv2
class Model: def __init__(self, learning_rate, batch_size, iterations, classes): """ :param learning_rate: 學習率 :param batch_size: 每個batc h的大小 :param iterations: 迭代的次數 :param classes: 輸出的類別數目 """ self.learning_rage = learning_rate self.batch_size = batch_size self.iterations = iterations self.classes = classes self.loss = [] # 記錄迭代的輪數和精度值 self.accuracy = [] def weight_variable(self, shape, name): """ :param shape: 引數的的維度 :return: 初始化之後的權重引數矩陣 """ initial = tf.truncated_normal(shape, stddev=0.1, name=name) return tf.Variable(initial) # 偏置項初始化函式 def bias_variable(self, shape, name): """ :param shape:偏置項的形狀 :return: 返回初始化結果,這裡初始化為0 """ initial = tf.constant(0., shape=shape, name=name) return tf.Variable(initial) # 卷積操作,步長設定為1 def conv2d(self, x, W): """ :param x: 輸入的資料 :param W: 卷積核的形狀 :return: 卷積操作的結果 """ return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME") # 池化層操作,步長設定為2 def max_pool_2x2(self, x): """ :param x: 輸入值 :return: 最大池化後的結果 """ return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # 建立模型 def buile_model(self, x, keep_prob): """ :return: 在構建好的模型上的預測值 """ # 將x轉變成4維的矩陣,第一個引數表示樣本的個數,2和3兩個引數表示圖片的寬和高,最後一個表示圖片的顏色通道數 # 由於是灰度影象,所以這裡的通道數是1. 第一個數字設定成-1表示自動匹配 x_image = tf.reshape(x, [-1, 28, 28, 1]) #實現第一層卷積操作,使用32個大小為3x3通道數目為1的卷積核 W_conv1 = self.weight_variable([3, 3, 1, 32], name="W_conv1") b_conv1 = self.bias_variable([32], name="b_conv1") h_conv1 = tf.nn.relu(self.conv2d(x_image, W_conv1) + b_conv1) h_pool1 = self.max_pool_2x2(h_conv1) #實現第二層卷積操作,使用64個大小為3x3通道數目為32的卷積核 W_conv2 = self.weight_variable([3, 3, 32, 64], name="W_conv2") b_conv2 = self.bias_variable([64], name="b_conv2") h_conv2 = tf.nn.relu(self.conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = self.max_pool_2x2(h_conv2) # 第一層全連線層 W_fc1 = self.weight_variable([7*7*64, 1024], name="W_fc1") b_fc1 = self.bias_variable([1024], name="b_fc1") h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 第二層全連線層,即輸出層 W_fc2 = self.weight_variable([1024, self.classes], name="W_fc2") b_fc2 = self.bias_variable([self.classes], name="b_fc2") y_prediction = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return y_prediction # 訓練模型 def train(self): # self.CNNCallback = CNNCallback # 設定x, y的佔位符,執行的時候可以根據佔位符號輸入具體的值 x = tf.placeholder(tf.float32, shape=[None, 784], name="x") y = tf.placeholder(tf.float32, shape=[None, 10]) keep_prob = tf.placeholder(tf.float32) # 載入資料 self.mnist = input_data.read_data_sets("MNIST_data", one_hot=True) # 接收模型計算所得的y_prediction y_prediction = self.buile_model(x, y, keep_prob) # 交叉熵值損失函式 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_prediction)) # 使用梯度下降進行反向傳播更新引數 train_step = tf.train.AdamOptimizer(self.learning_rage).minimize(cross_entropy) # 預測值和真實值是否相等 correct_prediction = tf.equal(tf.argmax(y_prediction, 1), tf.argmax(y, 1)) # 評估模型的精度 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 建立一個會話,開始訓練網路 with tf.Session() as sess: # 初始化所有張量 sess.run(tf.global_variables_initializer()) for i in range(self.iterations): # 從訓練資料集中拿出batch_size個樣本 batch = self.mnist.train.next_batch(self.batch_size) # 每隔20輪列印一次結果,並且儲存精度和損失 if i % 20 == 0: # 獲取精度和損失 train_accuracy = accuracy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob:1}) loss = cross_entropy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob:1}) print("step %d, loss is %.4f, training accuracy %.4f" % (i, loss, train_accuracy)) # 返回精度和損失 # self.CNNCallback(loss, train_accuracy) # 儲存精度和損失 self.accuracy.append([i, train_accuracy]) self.loss.append([i, loss]) train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5}) # 輸出測試集的精度 print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: self.mnist.test.images, y: self.mnist.test.labels, keep_prob: 0.7})) # 訓練模型並儲存模型 def train_and_save(self): # self.CNNCallback = CNNCallback # 設定x, y的佔位符,執行的時候可以根據佔位符號輸入具體的值 x = tf.placeholder(tf.float32, shape=[None, 784], name="x") y = tf.placeholder(tf.float32, shape=[None, 10]) keep_prob = tf.placeholder(tf.float32) # 載入資料 self.mnist = input_data.read_data_sets("MNIST_data", one_hot=True) # 接收模型計算所得的y_prediction y_prediction = self.buile_model(x, keep_prob) # 交叉熵值損失函式 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_prediction)) # 使用梯度下降進行反向傳播更新引數 train_step = tf.train.AdamOptimizer(self.learning_rage).minimize(cross_entropy) # 預測值和真實值是否相等 correct_prediction = tf.equal(tf.argmax(y_prediction, 1), tf.argmax(y, 1)) # 評估模型的精度 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 儲存模型 saver = tf.train.Saver() # 建立一個會話,開始訓練網路 with tf.Session() as sess: # 初始化所有張量 sess.run(tf.global_variables_initializer()) for i in range(self.iterations): # 從訓練資料集中拿出batch_size個樣本 batch = self.mnist.train.next_batch(self.batch_size) # 每隔20輪列印一次結果,並且儲存精度和損失 if i % 20 == 0: # 獲取精度和損失 train_accuracy = accuracy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob: 1}) loss = cross_entropy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob: 1}) print("step %d, loss is %.4f, training accuracy %.4f" % (i, loss, train_accuracy)) # 返回精度和損 self.accuracy.append([i, train_accuracy]) self.loss.append([i, loss]) train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5}) # 輸出測試集的精度 print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: self.mnist.test.images, y: self.mnist.test.labels, keep_prob: 0.7})) saver.save(sess, "./model/save_net.ckpt") tf.add_to_collection("y_prediction", y_prediction) # 返回精度 def get_accuracy(self): return self.accuracy # 返回損失 def get_loss(self): return self.loss def test_picture(self): # 隨機獲取圖片 picture_path = "pictures/mnist_test_"+str(np.random.randint(0, 50))+".jpg" # 宣告輸入圖片資料,類別 picture_path = "/home/yuhufei/PycharmProjects/Project/Project/Test1_基礎版本/demo/actual_pictures/eight.jpg" x = tf.placeholder('float', [None, 784]) keep_prob = tf.placeholder(tf.float32) y_predict = self.buile_model(x, keep_prob) im = cv2.imread(picture_path, cv2.IMREAD_GRAYSCALE).astype(np.float32) im = cv2.resize(im, (28, 28), interpolation=cv2.INTER_CUBIC) # 圖片預處理 # 資料從0~255轉為-0.5~0.5 img_gray = (im - (255 / 2.0)) / 255 x_img = np.reshape(img_gray, [-1, 784]) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, "./model/save_net.ckpt") output = sess.run(y_predict, feed_dict={x: x_img, keep_prob: 0.5}) predic_result = np.argmax(output) print('the predict is : ', predic_result) cv2.imshow('out', img_gray) cv2.waitKey(0) cv2.destroyAllWindows()
執行模型
model = Model(learning_rate=0.0001, iterations=5000, batch_size=32, classes=10)
model.train()
加粗樣式