基於tensorflow的深度學習框架優化
以下3個模型均能完成對多個類別的影象識別的過程。
Part 1:
Tensorflow簡單框架的搭建,
第一步:對圖片進行預處理,包括灰度化、尺寸改變等,並構建檔案圖片批量讀取及對應類別標籤,training_images存於目錄,下面有3個類別的批量圖片,對應3個類別的labes如下圖所示:
# -*- coding:utf-8 -*- import tensorflow as tf import cv2 import numpy as np import os import random import sys from sklearn.model_selection import train_test_split size = h = w = 28 # 轉化圖片大小 imgs = [] labs = [] # 檔案路徑 file_dir = './training_images' # 讀取圖片資料,並對應做記號1,2,··· def readData(path , dirNum, h=size, w=size): for filename in os.listdir(path): # 返回指定的資料夾包含的檔案或資料夾的名字的列表 if filename.endswith('.jpg'): # 判斷字串結尾 filename = path + '/' + filename # print(filename) img = cv2.imread(filename) # 讀取檔案流 # cv2.waitKey(0) # 顯示永久 # 轉為灰度圖片 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) cv2.imshow('image', img) img = cv2.resize(img, (h, w)) #改變圖片尺寸大小 # 儲存圖片 cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img) imgs.append(img) labs.append(dirNum) # 記住圖片來自第幾個類別,並讀取圖片 def file_name(file_dir): dirNum = 0 for lists in os.listdir(file_dir): # sub_path = os.path.join(file_dir, lists) sub_path = file_dir + '/' + lists readData(sub_path, dirNum) print(sub_path) if os.path.isfile(sub_path): fileNum = fileNum + 1 # 統計檔案數量 elif os.path.isdir(sub_path): dirNum = dirNum + 1 # 統計資料夾數量 return dirNum dirNum = file_name(file_dir) # 讀取圖片及對應類別,返回總類別個數 # 將圖片資料與標籤轉換成陣列 imgs = np.array(imgs) # 圖片資料 list = [] for lab in labs: for i in range(dirNum): if lab == i: arr = [0 for x in range(0, dirNum)] arr[i] = 1 list.append(arr) break labs = np.array(list) # 類別標籤
第二步:使用sklearn裡的庫以0.05的比例隨機生成訓練集和測試集,然後改變資料集形狀,標準化
# 隨機劃分測試集與訓練集 train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100)) # 引數:改變形狀 train_x = train_x.reshape(train_x.shape[0], 784) test_x = test_x.reshape(test_x.shape[0], 784) # 將資料轉換成小於1的數 train_x = train_x.astype('float32')/255.0 test_x = test_x.astype('float32')/255.0 # 輸入資料地方,計算圖 x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, dirNum])
設計Tensorflow簡單框架,包括sess、W、b初始化,softmax函式預測分類情況,並繼續求loss,使用固定學習率0.5優化,使用MBGD小批量梯度下降法,設定batch_size進行小批量樣本迭代訓練,最後求準確率
sess = tf.InteractiveSession() # 註冊預設session # W、b形狀,初始全為0 W = tf.Variable(tf.zeros([784, dirNum])) b = tf.Variable(tf.zeros([dirNum])) y = tf.nn.softmax(tf.matmul(x, W) + b) # softmax(Wx+b) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) # loss,每個batch_size均值 train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 學習率固定0.5,訓練loss tf.global_variables_initializer().run() # 全域性引數初始化器並執行 # 圖片塊,每次取20張圖片 batch_size = 20 num_batch = len(train_x) // batch_size # 總體樣本需要取幾次 for i in range(500): for n in range(num_batch): batch_x = train_x[n * batch_size: (n + 1) * batch_size] batch_y = train_y[n * batch_size: (n + 1) * batch_size] train_step.run({x: batch_x, y_: batch_y}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率 print(accuracy.eval({x: test_x, y_: test_y}))
Part 2 :
與上對比優化的地方在於:
1)權重初始化不再全為0,而為截斷的正態分佈函式,標準差設為0.1,共有tf.random_normal正態分佈 & tf.random_uniform 均勻分佈& tf.truncated_normal 截斷的正態分佈,不會大於平均值2個標準差& tf.random_shuffle等幾種方式
2)加入隱含層hidden1,使用Relu函式啟用,啟用函式還要sigmoid,tanh等,CNN一般用Relu防止梯度彌散。
3 ) 使用dropout層防止過擬合,即在訓練中讓某些節點置為0,不參與運算,引數keep_prob(0,1),但在最後預測一般需設為1,因為預測需要用全特徵。還要使用正則化或lrn層等方式。
4)使用自適應的學習率演算法Adagrad,還要Adam、Adadelta等演算法減少引數除錯負擔。
# -*- coding:utf-8 -*-
import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split
size = h = w = 28 # 784
imgs = []
labs = []
# 檔案路徑
file_dir = './training_images'
# 讀取圖片資料,並做記號1,2,···
def readData(path , dirNum, h=size, w=size):
for filename in os.listdir(path): # 返回指定的資料夾包含的檔案或資料夾的名字的列表
if filename.endswith('.jpg'): # 判斷字串結尾
filename = path + '/' + filename
# print(filename)
img = cv2.imread(filename) # 讀取檔案流
# cv2.waitKey(0) # 顯示永久
# 轉為灰度圖片
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('image', img)
img = cv2.resize(img, (h, w)) #改變圖片尺寸大小
# 儲存圖片
cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)
imgs.append(img)
labs.append(dirNum)
# 記住圖片來自第幾個類別,並讀取圖片
def file_name(file_dir):
dirNum = 0
for lists in os.listdir(file_dir):
# sub_path = os.path.join(file_dir, lists)
sub_path = file_dir + '/' + lists
readData(sub_path, dirNum)
print(sub_path)
if os.path.isfile(sub_path):
fileNum = fileNum + 1 # 統計檔案數量
elif os.path.isdir(sub_path):
dirNum = dirNum + 1 # 統計資料夾數量
# print(dirNum)
return dirNum
dirNum = file_name(file_dir) # 讀取圖片及對應類別,返回總類別個數
# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
for i in range(dirNum):
if lab == i:
arr = [0 for x in range(0, dirNum)]
arr[i] = 1
list.append(arr)
break
labs = np.array(list)
# 隨機劃分測試集與訓練集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 引數:改變形狀
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0
# 輸入資料地方,計算圖
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])
sess = tf.InteractiveSession() # 註冊預設session
in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1)) # 權重初始化為截斷的正態分佈函式,標準差設為0.1
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, dirNum]))
b2 = tf.Variable(tf.zeros([dirNum]))
keep_prob = tf.placeholder(tf.float32)
hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1) # 使用Relu函式啟用
hidden1_drop = tf.nn.dropout(hidden1, keep_prob) # droupout,keep_prob為節點不置為0比率,訓練時小於1,預測時等於1
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2) # softmax分類
# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy) # 使用Adagrad自適應優化學習率
# Train
tf.global_variables_initializer().run()
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
for n in range(num_batch):
batch_x = train_x[n * batch_size: (n + 1) * batch_size]
batch_y = train_y[n * batch_size: (n + 1) * batch_size]
train_step.run({x: batch_x, y_: batch_y, keep_prob: 0.75})
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
print(accuracy.eval({x: test_x, y_: test_y, keep_prob: 1.0}))
Part 3 :
此處為完整的CNN卷積神經網路的框架搭建。
經歷兩層卷積啟用、池化層、一層全連線、一層dropout、一層輸出層,各層的具體實現及說明已在程式碼中備註,上面的1)-4)優化在該模型中均已用到,只不過改用了Adam學習率自適應函式。在程式碼最後的寫法上面也有一些改善,比如增加了100次顯示一次準確率的輸出,當達到很高值時說明已經是一個很好的模型了,直接結束。下面程式碼可以作為基於tf的標準的CNN深度學習框架模型。
# -*- coding:utf-8 -*-
import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split
size = h = w = 28 # 784
imgs = []
labs = []
# 檔案地址
file_dir = './training_images'
# 讀取圖片資料,並做記號1,2,···
def readData(path, dirNum, h=size, w=size):
for filename in os.listdir(path): # 返回指定的資料夾包含的檔案或資料夾的名字的列表
if filename.endswith('.jpg'): # 判斷字串結尾
filename = path + '/' + filename
# print(filename)
img = cv2.imread(filename) # 讀取檔案流
# cv2.waitKey(0) # 顯示永久
# 轉為灰度圖片
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow('image', img1)
img = cv2.resize(img, (h, w)) # 改變圖片尺寸大小
# 儲存圖片
cv2.imwrite('./other' + '/' + str(dirNum + 1) + '.jpg', img)
imgs.append(img)
labs.append(dirNum)
def file_name(file_dir):
dirNum = 0
for lists in os.listdir(file_dir):
# sub_path = os.path.join(file_dir, lists)
sub_path = file_dir + '/' + lists
readData(sub_path, dirNum)
print(sub_path)
if os.path.isfile(sub_path):
fileNum = fileNum + 1 # 統計檔案數量
elif os.path.isdir(sub_path):
dirNum = dirNum + 1 # 統計資料夾數量
# print(dirNum)
return dirNum
dirNum = file_name(file_dir) # 讀取圖片及對應類別,返回總類別個數
# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
for i in range(dirNum):
if lab == i:
arr = [0 for x in range(0, dirNum)]
arr[i] = 1
list.append(arr)
break
labs = np.array(list)
# 隨機劃分測試集與訓練集
train_x, test_x, train_y, test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0, 100))
print(train_x.shape)
# 引數:圖片資料的總數,圖片的高、寬、通道(灰度圖為1,彩色圖為3)
train_x = train_x.reshape(train_x.shape[0], size, size, 1)
test_x = test_x.reshape(test_x.shape[0], size, size, 1)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32') / 255.0
test_x = test_x.astype('float32') / 255.0
sess = tf.InteractiveSession()
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x = tf.placeholder(tf.float32, [None, size, size, 1])
y_ = tf.placeholder(tf.float32, [None, dirNum])
# 卷積啟用、池化
W_conv1 = weight_variable([5, 5, 1, 32]) # 卷積核大小(5,5), 輸入通道(1), 輸出通道(32),代表32個卷積核
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # 2*2最大池化
# 卷積啟用、池化
W_conv2 = weight_variable([5, 5, 32, 64]) # 5*5
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# 全連線層
W_fc1 = weight_variable([7 * 7 * 64, 1024]) # 經過兩次池化,28*28/2/2=7*7
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# dropout層
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 輸出層
W_fc2 = weight_variable([1024, dirNum])
b_fc2 = bias_variable([dirNum])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 使用Adam自適應優化學習率
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
tf.global_variables_initializer().run()
# 一次取20張
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
for n in range(num_batch):
batch_x = train_x[n * batch_size: (n + 1) * batch_size]
batch_y = train_y[n * batch_size: (n + 1) * batch_size]
# batch = mnist.train.next_batch(50)
if (i * num_batch + n) % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch_x, y_: batch_y, keep_prob: 1.0})
print("step %d, training accuracy %g" % (i, train_accuracy))
if train_accuracy > 0.99 and i > 2:
sys.exit(0)
train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval(feed_dict={
x: test_x, y_: test_y, keep_prob: 1.0}))
最終使用我自己的3個類別圖片(小車、飛機、摩托車幾百張)發現前兩種方法的結果準確率並不明顯,第三種準確率最高達到了100%,也是符合不斷優化的一個過程。
不過本文重點放在了優化的可能方式上,並沒有對某一種進一步優化。重點介紹了權重初始化方式、隱含層及啟用函式類別、防止過擬合方法、學習率自適應演算法,當然模型中還要一些可以調節的引數,比如batch_size(每批次樣本訓練數)、迭代次數、卷積層(卷積核大小、特徵數、層數)、池化層(池化大小及方式)等,此處因為沒有理論支援暫未涉及。通過本文你應該瞭解到了基於tensorflow建立CNN框架的模型,邁進了影象識別的一小步。