1. 程式人生 > >基於tensorflow的深度學習框架優化

基於tensorflow的深度學習框架優化

以下3個模型均能完成對多個類別的影象識別的過程。

Part 1:

Tensorflow簡單框架的搭建,

第一步:對圖片進行預處理,包括灰度化、尺寸改變等,並構建檔案圖片批量讀取及對應類別標籤,training_images存於目錄,下面有3個類別的批量圖片,對應3個類別的labes如下圖所示:


# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 轉化圖片大小

imgs = []
labs = []

# 檔案路徑
file_dir = './training_images'

# 讀取圖片資料,並對應做記號1,2,···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'): # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 記住圖片來自第幾個類別,並讀取圖片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
    return dirNum

dirNum = file_name(file_dir) # 讀取圖片及對應類別,返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs) # 圖片資料
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list) # 類別標籤

第二步:使用sklearn裡的庫以0.05的比例隨機生成訓練集和測試集,然後改變資料集形狀,標準化



# 隨機劃分測試集與訓練集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 引數:改變形狀
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 輸入資料地方,計算圖
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

設計Tensorflow簡單框架,包括sess、W、b初始化,softmax函式預測分類情況,並繼續求loss,使用固定學習率0.5優化,使用MBGD小批量梯度下降法,設定batch_size進行小批量樣本迭代訓練,最後求準確率


sess = tf.InteractiveSession() # 註冊預設session

# W、b形狀,初始全為0
W = tf.Variable(tf.zeros([784, dirNum]))
b = tf.Variable(tf.zeros([dirNum]))

y = tf.nn.softmax(tf.matmul(x, W) + b)  # softmax(Wx+b)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) # loss,每個batch_size均值

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 學習率固定0.5,訓練loss

tf.global_variables_initializer().run() # 全域性引數初始化器並執行

# 圖片塊,每次取20張圖片
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]
        train_step.run({x: batch_x, y_: batch_y})

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率

print(accuracy.eval({x: test_x, y_: test_y}))

Part 2 :

與上對比優化的地方在於:

1)權重初始化不再全為0,而為截斷的正態分佈函式,標準差設為0.1,共有tf.random_normal正態分佈 & tf.random_uniform 均勻分佈& tf.truncated_normal 截斷的正態分佈,不會大於平均值2個標準差& tf.random_shuffle等幾種方式

2)加入隱含層hidden1,使用Relu函式啟用,啟用函式還要sigmoid,tanh等,CNN一般用Relu防止梯度彌散。

3 ) 使用dropout層防止過擬合,即在訓練中讓某些節點置為0,不參與運算,引數keep_prob(0,1),但在最後預測一般需設為1,因為預測需要用全特徵。還要使用正則化或lrn層等方式。

4)使用自適應的學習率演算法Adagrad,還要Adam、Adadelta等演算法減少引數除錯負擔。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 784

imgs = []
labs = []

# 檔案路徑
file_dir = './training_images'

# 讀取圖片資料,並做記號1,2,···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'): # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 記住圖片來自第幾個類別,並讀取圖片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir) # 讀取圖片及對應類別,返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 隨機劃分測試集與訓練集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 引數:改變形狀
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 輸入資料地方,計算圖
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

sess = tf.InteractiveSession()  # 註冊預設session

in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1)) # 權重初始化為截斷的正態分佈函式,標準差設為0.1
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, dirNum]))
b2 = tf.Variable(tf.zeros([dirNum]))

keep_prob = tf.placeholder(tf.float32)

hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1) # 使用Relu函式啟用
hidden1_drop = tf.nn.dropout(hidden1, keep_prob) # droupout,keep_prob為節點不置為0比率,訓練時小於1,預測時等於1
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2) # softmax分類

# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy) # 使用Adagrad自適應優化學習率

# Train
tf.global_variables_initializer().run()

batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
  for n in range(num_batch):
      batch_x = train_x[n * batch_size: (n + 1) * batch_size]
      batch_y = train_y[n * batch_size: (n + 1) * batch_size]
      train_step.run({x: batch_x, y_: batch_y, keep_prob: 0.75})

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
print(accuracy.eval({x: test_x, y_: test_y, keep_prob: 1.0}))

Part 3 :

此處為完整的CNN卷積神經網路的框架搭建。

經歷兩層卷積啟用、池化層、一層全連線、一層dropout、一層輸出層,各層的具體實現及說明已在程式碼中備註,上面的1)-4)優化在該模型中均已用到,只不過改用了Adam學習率自適應函式。在程式碼最後的寫法上面也有一些改善,比如增加了100次顯示一次準確率的輸出,當達到很高值時說明已經是一個很好的模型了,直接結束。下面程式碼可以作為基於tf的標準的CNN深度學習框架模型。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28  # 784

imgs = []
labs = []

# 檔案地址
file_dir = './training_images'

# 讀取圖片資料,並做記號1,2,···
def readData(path, dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'):  # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename)  # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # cv2.imshow('image', img1)
            img = cv2.resize(img, (h, w))  # 改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' + '/' + str(dirNum + 1) + '.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir)  # 讀取圖片及對應類別,返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 隨機劃分測試集與訓練集
train_x, test_x, train_y, test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0, 100))
print(train_x.shape)
# 引數:圖片資料的總數,圖片的高、寬、通道(灰度圖為1,彩色圖為3)
train_x = train_x.reshape(train_x.shape[0], size, size, 1)
test_x = test_x.reshape(test_x.shape[0], size, size, 1)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32') / 255.0
test_x = test_x.astype('float32') / 255.0

sess = tf.InteractiveSession()

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder(tf.float32, [None, size, size, 1])
y_ = tf.placeholder(tf.float32, [None, dirNum])

# 卷積啟用、池化
W_conv1 = weight_variable([5, 5, 1, 32])  # 卷積核大小(5,5), 輸入通道(1), 輸出通道(32),代表32個卷積核
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # 2*2最大池化

# 卷積啟用、池化
W_conv2 = weight_variable([5, 5, 32, 64])  # 5*5
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# 全連線層
W_fc1 = weight_variable([7 * 7 * 64, 1024])  # 經過兩次池化,28*28/2/2=7*7
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# dropout層
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 輸出層
W_fc2 = weight_variable([1024, dirNum])
b_fc2 = bias_variable([dirNum])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))  # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)  # 使用Adam自適應優化學習率

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
tf.global_variables_initializer().run()

# 一次取20張
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]

        # batch = mnist.train.next_batch(50)
        if (i * num_batch + n)  % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x: batch_x, y_: batch_y, keep_prob: 1.0})
            print("step %d, training accuracy %g" % (i, train_accuracy))
            if train_accuracy > 0.99 and i > 2:
                sys.exit(0)
        train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

print("test accuracy %g" % accuracy.eval(feed_dict={
    x: test_x, y_: test_y, keep_prob: 1.0}))

    最終使用我自己的3個類別圖片(小車、飛機、摩托車幾百張)發現前兩種方法的結果準確率並不明顯,第三種準確率最高達到了100%,也是符合不斷優化的一個過程。

    不過本文重點放在了優化的可能方式上,並沒有對某一種進一步優化。重點介紹了權重初始化方式、隱含層及啟用函式類別、防止過擬合方法、學習率自適應演算法,當然模型中還要一些可以調節的引數,比如batch_size(每批次樣本訓練數)、迭代次數、卷積層(卷積核大小、特徵數、層數)、池化層(池化大小及方式)等,此處因為沒有理論支援暫未涉及。通過本文你應該瞭解到了基於tensorflow建立CNN框架的模型,邁進了影象識別的一小步。