tensorflow實現兩層神經網路 (附程式碼)

阿新 • • 發佈：2019-02-08

不得不說，本人真是屬於比較懶的那種，特別是在寫文章這塊，好多次想寫，但是。。一想到要寫一堆字就滿心鬱悶。

下面是正題，這篇文章將會貼出兩份用tensorflow實現兩層神經網路的程式碼，一份純手寫，一份是從騰訊雲裡提供的程式碼，複製下來，然後改動了一下而成的。貼出程式碼有兩個目的：

（1）自己找程式碼實在找的好辛苦啊，貼出這兩份，希望能減少別人的一些彎路。。

（2）我這兩份程式碼使用的引數可以說是完全一樣了，但是出現兩個問題

（1）首先預測的準確率就差很多，騰訊雲那份程式碼能達到0.9的準確率（嗯。。說來慚愧，本來用它程式碼裡的那個資料，能達到0.94的準確率，然後我改了改，將輸入改成本地的資料，判斷準確率的函式換成了自己的，然後兩層迭代也改成了一層迭代，一些迭代次數這種小引數也改了一下，嗯。。準確率就降到0.89左右了，這個下降的原因還得思考一下），而我自己寫的程式碼，準確率最高能達到0.79左右？這個還得看運氣。。有時候0.71什麼的都不奇怪。。

ps:哈哈，更正更正，我自己的也是能達到0.87左右的，哈哈，嗯。。看來這個果然看運氣。。。為什麼。。

哈哈，圖為證！！

（2）一開始得到的loss也完全不一樣，騰訊雲的那份一開始的loss都是以幾千計的。。（比如什麼5000多），然後loss下降的速度也很快，最後經過100輪能達到5，60左右，而我的那份就不一樣了。。loss都是從2.3左右這樣的數字開始的，最後能降到0.8，0.9左右。。

所以我就很不明白，資料集一樣，引數一樣，為什麼出來的結果完全不一樣。。判斷準確率的函式也是一樣的，問題先丟在這，等以後理解深一點了，再回來看看。

最後還有一點，有一段時間，我發現我寫的程式碼，loss完全沒法更新，就是更新來更新去都在2.3左右，然後發現這個好像跟w的起始權重也有關係

如果w的初始設成上面的那種就可以更新，下面的就不可以，但是騰訊雲裡面的w就是下面的那種，這個還得看一下函式

下面不多說廢話，第一份是我自己的，哈哈

import tensorflow as tf
import numpy as np
import pickle
import gzip
# 首先定義輸入

def get_input():
   f = gzip.open('./mnist.pkl.gz', 'rb')
   training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
   train_x = [np.reshape(x,(1,784)) for x in training_data[0]]
   train_y = [vectorized(y) for y in training_data[1]]
   test_x = [np.reshape(x,(1,784)) for x in test_data[0]]
   test_y = [vectorized(y) for y in test_data[1]]
   return train_x,train_y,test_x,test_y

def get_batch(train_x,train_y,batch_size):
   a = np.arange(len(train_x))
   np.random.shuffle(a)
   t_x = np.reshape(train_x,(len(train_x),784))
   t_y = np.reshape(train_y,(len(train_y),10))
   t_xi = [t_x[a[i]] for i in range(batch_size)]
   t_yi = [t_y[a[i]] for i in range(batch_size)]
   return t_xi,t_yi

def vectorized(num):
   e = np.zeros((10, 1))
   e[num] = 1
   return e

def predict(test_x,test_y,X,Y,batch_size):
# 下面要預測,計算準確率,
data_num = len(test_x)
print("this is test data_num:",data_num)
count=0
print("this is predict")
for data_index in range(0,data_num,batch_size):
t_x = np.reshape(test_x[data_index:data_index+batch_size],(batch_size,784))
# t_x = t_x.T
t_y = np.reshape(test_y[data_index:data_index+batch_size],(batch_size,10))

a = sess.run(Y,feed_dict = {X:t_x})
# print(a.shape)
for i in range(len(a)):
if return_big(t_y[i]) == return_big(a[i]):
count+=1
print("this is count:",count)
print("this is precise:",count/data_num)

def return_big(arr):
   # 這裡實現相同作用的函式是tf.argmax
   # 用於返回一維陣列中最大值的索引
   t = arr[0]
   t_flag = 0
   for i in range(1,len(arr)):
       if arr[i]> t:
           t = arr[i]
           t_flag = i
   return t_flag

if __name__ == "__main__":
   batch_size = 100
   # 隱藏層神經元的數目
   hidden_layer1 = 500
   hidden_layer2 = 500
   #學習率
   learning_rate = 0.1
   #迭代的次數
   epoch_num = 150
   x = tf.placeholder(tf.float32,[batch_size,784])
   y_pred = tf.placeholder(tf.float32,[batch_size,10])
   # y_pred用來裝分類的結果，即labels

   w1 = tf.Variable(tf.truncated_normal([784,hidden_layer1],stddev = 0.1))
   # w1 = tf.Variable(tf.random_normal([784,hidden_layer1]))
   b1 = tf.Variable(tf.zeros([1,hidden_layer1])+0.01)
   w2 = tf.Variable(tf.truncated_normal([hidden_layer1,hidden_layer2],stddev = 0.1))
   # w2 = tf.Variable(tf.random_normal([hidden_layer1,hidden_layer2]))
   b2 = tf.Variable(tf.zeros([1,hidden_layer2])+0.01)
   w3 = tf.Variable(tf.truncated_normal([hidden_layer2,10],stddev = 0.1))
   # w3 = tf.Variable(tf.random_normal([hidden_layer2,10]))
   b3 = tf.Variable(tf.zeros([1,10])+0.01)

   h1 = tf.nn.relu(tf.matmul(x,w1)+b1)
   h2 = tf.nn.relu(tf.matmul(h1,w2)+b2)
   y = tf.nn.softmax(tf.matmul(h2,w3)+b3)

   #上面是前向傳播的過程，下面是反向傳播
   entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_pred, logits=y)
   loss = tf.reduce_mean(entropy)
   optimize = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
   init = tf.global_variables_initializer()

   with tf.Session() as sess:
       sess.run(init)
       train_x,train_y,test_x,test_y = get_input()
       print(len(train_x))
       print(len(train_y))
       for i in range(epoch_num):
           t_x,t_y = get_batch(train_x,train_y,batch_size)
           _,_loss = sess.run([optimize,loss],feed_dict = {x:t_x,y_pred:t_y})
           print("loss of epoches[{0}]:{1}".format(i,_loss))

       # 下面是計算預測準確率
       predict(test_x,test_y,x,y,batch_size)

下面是騰訊雲的，但是是我改動過了的，嗯。。原版。。被我覆蓋掉了，想要的就到騰訊雲找吧

#-*- encoding:utf-8 -*-
#!/usr/local/env python

import numpy as np
import tensorflow as tf
import pickle
import gzip
# from tensorflow.examples.tutorials.mnist import input_data

def add_layer(inputs, in_size, out_size, activation_function=None):
W = tf.Variable(tf.random_normal([in_size, out_size]))
b = tf.Variable(tf.zeros([1, out_size]) + 0.01)

Z = tf.matmul(inputs, W) + b
if activation_function is None:
outputs = Z
else:
outputs = activation_function(Z)

return outputs

def get_batch(train_x,train_y,batch_size):
a = np.arange(len(train_x))
np.random.shuffle(a)
t_x = np.reshape(train_x,(len(train_x),784))
t_y = np.reshape(train_y,(len(train_y),10))
t_xi = [t_x[a[i]] for i in range(batch_size)]
t_yi = [t_y[a[i]] for i in range(batch_size)]
return t_xi,t_yi

def get_input():
f = gzip.open('./mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
train_x = [np.reshape(x,(784,1)) for x in training_data[0]]
train_y = [vectorized(y) for y in training_data[1]]
test_x = [np.reshape(x,(784,1)) for x in test_data[0]]
test_y = [vectorized(y) for y in test_data[1]]
return train_x,train_y,test_x,test_y

def vectorized(num):
e = np.zeros((10, 1))
e[num] = 1
return e

def return_big(arr):
# 這裡實現相同作用的函式是tf.argmax
# 用於返回一維陣列中最大值的索引
t = arr[0]
t_flag = 0
for i in range(1,len(arr)):
if arr[i]> t:
t = arr[i]
t_flag = i
return t_flag

def predict(test_x,test_y,X,Y,batch_size):
# 下面要預測,計算準確率,
data_num = len(test_x)
print("this is test data_num:",data_num)
count=0
print("this is predict")
for data_index in range(0,data_num,batch_size):
t_x = np.reshape(test_x[data_index:data_index+batch_size],(batch_size,784))
# t_x = t_x.T
t_y = np.reshape(test_y[data_index:data_index+batch_size],(batch_size,10))

a = sess.run(Y,feed_dict = {X:t_x})
# print(a.shape)
for i in range(len(a)):
if return_big(t_y[i]) == return_big(a[i]):
count+=1
print("this is count:",count)
print("this is precise:",count/data_num)

if __name__ == "__main__":

# MNIST = input_data.read_data_sets("mnist", one_hot=True)
# print(MNIST)
learning_rate = 0.01
batch_size = 100
n_epochs = 100

X = tf.placeholder(tf.float32, [batch_size, 784])
Y = tf.placeholder(tf.float32, [batch_size, 10])

layer_dims = [784, 500, 500, 10]
layer_count = len(layer_dims)-1 # 不算輸入層
layer_iter = X

for l in range(1, layer_count): # layer [1,layer_count-1] is hidden layer
layer_iter = add_layer(layer_iter, layer_dims[l-1], layer_dims[l], activation_function=tf.nn.relu)
prediction = add_layer(layer_iter, layer_dims[layer_count-1], layer_dims[layer_count], activation_function=None)

entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=prediction)
loss = tf.reduce_mean(entropy)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)

# n_batches = int(MNIST.test.num_examples/batch_size)
# print("this is n_batches:",n_batches)
# print("this is n_epochs:",n_epochs)
train_x,train_y,test_x,test_y = get_input()
print("this is train_x len:",len(train_x))
for i in range(n_epochs):
print("this is ",i," epochs!!")
# for j in range(n_batches):
# X_batch, Y_batch = MNIST.train.next_batch(batch_size)
X_batch, Y_batch = get_batch(train_x,train_y,batch_size)
# print("this is x_batch shape:",len(X_batch))
# print("this is X_batch shape:",X_batch.shape)
# print("this is Y_batch shape:",Y_batch.shape)
_, loss_ = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
# if i % 10 == 5 and j == 0:
print("Loss of epochs[{0}]: {1}".format(i, loss_))

# test the model
# n_batches = int(MNIST.test.num_examples/batch_size)
# total_correct_preds = 0
# for i in range(n_batches):
# X_batch, Y_batch = MNIST.test.next_batch(10000)
# predict(X_batch,Y_batch,X,prediction,batch_size)
predict(test_x,test_y,X,prediction,batch_size)
# preds = sess.run(prediction, feed_dict={X: X_batch, Y: Y_batch})
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
# accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

# total_correct_preds += sess.run(accuracy)

# print("Accuracy {0}".format(total_correct_preds/MNIST.test.num_examples))

tensorflow實現兩層神經網路 (附程式碼)

tensorflow實現兩層神經網路 (附程式碼)

tensorflow-搭建兩層神經網路

Python 實現感知器模型、兩層神經網路

Python20行程式碼實現多層神經網路的學習

字元型圖片驗證碼，使用tensorflow實現卷積神經網路，進行驗證碼識別CNN

【深度學習】Python實現2層神經網路的誤差反向傳播法學習

人工智慧（4）- 實現多層神經網路

Tensorflow實現卷積神經網路

機器學習筆記：tensorflow實現卷積神經網路經典案例--識別手寫數字

深度學習筆記5-tensorflow實現卷積神經網路

[手把手系列之二]實現多層神經網路

CS231n-2017 Assignment1 k-近鄰方法、SVM、Softmax、兩層神經網路

Tensorflow MNIST淺層神經網路的解釋和答覆

【Deep Learning】tensorflow實現卷積神經網路(AlexNet)

Tensorflow實現卷積神經網路，用於人臉關鍵點識別

機器學習實驗（四）：用tensorflow實現卷積神經網路識別人類活動

深度學習入門（三）構建簡單的兩層神經網路

TensorFlow實現卷積神經網路（進階）

跟著吳恩達學深度學習：用Scala實現神經網路-第二課：用Scala實現多層神經網路

python實現淺層神經網路預測貓狗

tensorflow實現兩層神經網路 (附程式碼)

相關推薦