基於tensorflow的深度學習框架優化

阿新 • • 發佈：2018-12-31

以下3個模型均能完成對多個類別的影象識別的過程。

Part 1:

Tensorflow簡單框架的搭建，

第一步：對圖片進行預處理，包括灰度化、尺寸改變等，並構建檔案圖片批量讀取及對應類別標籤，training_images存於目錄，下面有3個類別的批量圖片，對應3個類別的labes如下圖所示：

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 轉化圖片大小

imgs = []
labs = []

# 檔案路徑
file_dir = './training_images'

# 讀取圖片資料，並對應做記號1,2，···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'): # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 記住圖片來自第幾個類別，並讀取圖片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
    return dirNum

dirNum = file_name(file_dir) # 讀取圖片及對應類別，返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs) # 圖片資料
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list) # 類別標籤

第二步：使用sklearn裡的庫以0.05的比例隨機生成訓練集和測試集，然後改變資料集形狀，標準化



# 隨機劃分測試集與訓練集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 引數：改變形狀
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 輸入資料地方,計算圖
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

設計Tensorflow簡單框架，包括sess、W、b初始化，softmax函式預測分類情況，並繼續求loss，使用固定學習率0.5優化，使用MBGD小批量梯度下降法，設定batch_size進行小批量樣本迭代訓練，最後求準確率


sess = tf.InteractiveSession() # 註冊預設session

# W、b形狀，初始全為0
W = tf.Variable(tf.zeros([784, dirNum]))
b = tf.Variable(tf.zeros([dirNum]))

y = tf.nn.softmax(tf.matmul(x, W) + b)  # softmax(Wx+b)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) # loss,每個batch_size均值

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 學習率固定0.5，訓練loss

tf.global_variables_initializer().run() # 全域性引數初始化器並執行

# 圖片塊，每次取20張圖片
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]
        train_step.run({x: batch_x, y_: batch_y})

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率

print(accuracy.eval({x: test_x, y_: test_y}))

Part 2 :

與上對比優化的地方在於：

1）權重初始化不再全為0，而為截斷的正態分佈函式，標準差設為0.1，共有tf.random_normal正態分佈 & tf.random_uniform 均勻分佈& tf.truncated_normal 截斷的正態分佈，不會大於平均值2個標準差& tf.random_shuffle等幾種方式

2）加入隱含層hidden1，使用Relu函式啟用，啟用函式還要sigmoid，tanh等，CNN一般用Relu防止梯度彌散。

3 ) 使用dropout層防止過擬合，即在訓練中讓某些節點置為0，不參與運算，引數keep_prob（0,1），但在最後預測一般需設為1，因為預測需要用全特徵。還要使用正則化或lrn層等方式。

4）使用自適應的學習率演算法Adagrad，還要Adam、Adadelta等演算法減少引數除錯負擔。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28 # 784

imgs = []
labs = []

# 檔案路徑
file_dir = './training_images'

# 讀取圖片資料，並做記號1,2，···
def readData(path , dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'): # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename) # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imshow('image', img)
            img = cv2.resize(img, (h, w)) #改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' +'/'+str(dirNum + 1)+'.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

# 記住圖片來自第幾個類別，並讀取圖片
def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir) # 讀取圖片及對應類別，返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 隨機劃分測試集與訓練集
train_x,test_x,train_y,test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0,100))
# 引數：改變形狀
train_x = train_x.reshape(train_x.shape[0], 784)
test_x = test_x.reshape(test_x.shape[0], 784)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32')/255.0
test_x = test_x.astype('float32')/255.0

# 輸入資料地方,計算圖
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, dirNum])

sess = tf.InteractiveSession()  # 註冊預設session

in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1)) # 權重初始化為截斷的正態分佈函式，標準差設為0.1
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, dirNum]))
b2 = tf.Variable(tf.zeros([dirNum]))

keep_prob = tf.placeholder(tf.float32)

hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1) # 使用Relu函式啟用
hidden1_drop = tf.nn.dropout(hidden1, keep_prob) # droupout,keep_prob為節點不置為0比率，訓練時小於1，預測時等於1
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2) # softmax分類

# Define loss and optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy) # 使用Adagrad自適應優化學習率

# Train
tf.global_variables_initializer().run()

batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
  for n in range(num_batch):
      batch_x = train_x[n * batch_size: (n + 1) * batch_size]
      batch_y = train_y[n * batch_size: (n + 1) * batch_size]
      train_step.run({x: batch_x, y_: batch_y, keep_prob: 0.75})

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # 預測是否正確
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
print(accuracy.eval({x: test_x, y_: test_y, keep_prob: 1.0}))

Part 3 :

此處為完整的CNN卷積神經網路的框架搭建。

經歷兩層卷積啟用、池化層、一層全連線、一層dropout、一層輸出層，各層的具體實現及說明已在程式碼中備註，上面的1）-4）優化在該模型中均已用到，只不過改用了Adam學習率自適應函式。在程式碼最後的寫法上面也有一些改善，比如增加了100次顯示一次準確率的輸出，當達到很高值時說明已經是一個很好的模型了，直接結束。下面程式碼可以作為基於tf的標準的CNN深度學習框架模型。

# -*- coding:utf-8 -*-

import tensorflow as tf
import cv2
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split

size = h = w = 28  # 784

imgs = []
labs = []

# 檔案地址
file_dir = './training_images'

# 讀取圖片資料，並做記號1,2，···
def readData(path, dirNum, h=size, w=size):
    for filename in os.listdir(path):  # 返回指定的資料夾包含的檔案或資料夾的名字的列表
        if filename.endswith('.jpg'):  # 判斷字串結尾
            filename = path + '/' + filename
            # print(filename)

            img = cv2.imread(filename)  # 讀取檔案流
            # cv2.waitKey(0) # 顯示永久
            # 轉為灰度圖片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # cv2.imshow('image', img1)
            img = cv2.resize(img, (h, w))  # 改變圖片尺寸大小
            # 儲存圖片
            cv2.imwrite('./other' + '/' + str(dirNum + 1) + '.jpg', img)

            imgs.append(img)
            labs.append(dirNum)

def file_name(file_dir):
    dirNum = 0
    for lists in os.listdir(file_dir):
        # sub_path = os.path.join(file_dir, lists)
        sub_path = file_dir + '/' + lists
        readData(sub_path, dirNum)
        print(sub_path)
        if os.path.isfile(sub_path):
            fileNum = fileNum + 1  # 統計檔案數量
        elif os.path.isdir(sub_path):
            dirNum = dirNum + 1  # 統計資料夾數量
            # print(dirNum)
    return dirNum

dirNum = file_name(file_dir)  # 讀取圖片及對應類別，返回總類別個數

# 將圖片資料與標籤轉換成陣列
imgs = np.array(imgs)
list = []
for lab in labs:
    for i in range(dirNum):
        if lab == i:
            arr = [0 for x in range(0, dirNum)]
            arr[i] = 1
            list.append(arr)
            break
labs = np.array(list)

# 隨機劃分測試集與訓練集
train_x, test_x, train_y, test_y = train_test_split(imgs, labs, test_size=0.05, random_state=random.randint(0, 100))
print(train_x.shape)
# 引數：圖片資料的總數，圖片的高、寬、通道（灰度圖為1，彩色圖為3）
train_x = train_x.reshape(train_x.shape[0], size, size, 1)
test_x = test_x.reshape(test_x.shape[0], size, size, 1)
# 將資料轉換成小於1的數
train_x = train_x.astype('float32') / 255.0
test_x = test_x.astype('float32') / 255.0

sess = tf.InteractiveSession()

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder(tf.float32, [None, size, size, 1])
y_ = tf.placeholder(tf.float32, [None, dirNum])

# 卷積啟用、池化
W_conv1 = weight_variable([5, 5, 1, 32])  # 卷積核大小(5,5)， 輸入通道(1)， 輸出通道(32),代表32個卷積核
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # 2*2最大池化

# 卷積啟用、池化
W_conv2 = weight_variable([5, 5, 32, 64])  # 5*5
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# 全連線層
W_fc1 = weight_variable([7 * 7 * 64, 1024])  # 經過兩次池化，28*28/2/2=7*7
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# dropout層
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 輸出層
W_fc2 = weight_variable([1024, dirNum])
b_fc2 = bias_variable([dirNum])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))  # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)  # 使用Adam自適應優化學習率

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 準確率
tf.global_variables_initializer().run()

# 一次取20張
batch_size = 20
num_batch = len(train_x) // batch_size # 總體樣本需要取幾次
for i in range(500):
    for n in range(num_batch):
        batch_x = train_x[n * batch_size: (n + 1) * batch_size]
        batch_y = train_y[n * batch_size: (n + 1) * batch_size]

        # batch = mnist.train.next_batch(50)
        if (i * num_batch + n)  % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x: batch_x, y_: batch_y, keep_prob: 1.0})
            print("step %d, training accuracy %g" % (i, train_accuracy))
            if train_accuracy > 0.99 and i > 2:
                sys.exit(0)
        train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

print("test accuracy %g" % accuracy.eval(feed_dict={
    x: test_x, y_: test_y, keep_prob: 1.0}))

最終使用我自己的3個類別圖片（小車、飛機、摩托車幾百張）發現前兩種方法的結果準確率並不明顯，第三種準確率最高達到了100%，也是符合不斷優化的一個過程。

不過本文重點放在了優化的可能方式上，並沒有對某一種進一步優化。重點介紹了權重初始化方式、隱含層及啟用函式類別、防止過擬合方法、學習率自適應演算法，當然模型中還要一些可以調節的引數，比如batch_size(每批次樣本訓練數)、迭代次數、卷積層（卷積核大小、特徵數、層數）、池化層（池化大小及方式）等，此處因為沒有理論支援暫未涉及。通過本文你應該瞭解到了基於tensorflow建立CNN框架的模型，邁進了影象識別的一小步。

基於tensorflow的深度學習框架優化

基於tensorflow的深度學習框架優化

TensorFlow 深度學習框架（2）-- 反向傳播優化神經網路

深度學習第一步：windows+Anaconda下安裝tensorflow深度學習框架

TensorFlow深度學習框架學習（二）：TensorFlow實現線性支援向量機（SVM）

基於Matconvnet深度學習框架的方言分類（2）

基於matconvnet深度學習框架的方言分類（3）

基於Matconvnet深度學習框架的方言分類（1）

TensorFlow 深度學習框架（9）-- 經典卷積網路模型 : LeNet-5 模型 & Inception-v3 模型

TensorFlow 深度學習框架（1）-- 神經網路與前向傳播

深度學習框架tensorflow學習與應用7（改變模型和優化器提升準確率）

深度學習框架tensorflow學習與應用6（優化器SGD、ADAM、Adadelta、Momentum、RMSProp比較）

深度學習框架keras安裝（後端基於Tensorflow/theano）

win10下基於python（anaconda）安裝gpu版本的TensorFlow以及kears深度學習框架

如何在基於tensorflow的深度學習框架keras中指定GPU記憶體使用大小

如何在基於tensorflow的深度學習框架keras中指定GPU

深度學習框架TensorFlow學習與應用（四）——擬合問題、優化器

從TensorFlow到PyTorch：九大深度學習框架哪款最適合你？

深度學習框架哪家強：TensorFlow？Caffe？MXNet？Keras？PyTorch？

TensorFlow筆記（7）-----實戰Google深度學習框架----隊列與多線程

《Tensorflow 實戰Google深度學習框架》PDF版

基於tensorflow的深度學習框架優化

相關推薦