1. 程式人生 > 實用技巧 >Neural Network 學習2 前向傳播實戰

Neural Network 學習2 前向傳播實戰

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets

# 載入資料集
# x:[60k,28,28]
# y:[60k]
(x, y), _ = datasets.mnist.load_data()

# x:[0-255]==>[0,1.]因為範圍在0-1之間比較好優化
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

# 建立資料集可以一次性訓練一個batch
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(60)
train_iter = iter(train_db) # 迭代器
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape) # sample[0]是一個batch的x的大小;sample[1]是一個batch的y的大小

# [b,784]==>[b,256]==>[b,128]==>[b,10]
# w的維度[dim_in,dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1)) # 一定要加tf.Variable,不然梯度下降那裡會說型別是none
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1)) # 如果預設方差為1.就會導致梯度爆炸
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3 # learning_rate=0.001
for epoch in range(10): # 對整個資料集迭代10次
for step, (x, y) in enumerate(train_db): # 對每一個batch進行訓練,每一個batch運算叫一個step,為的是有目的的列印,告訴你當前是哪一個step
# x:[128,28,28]
x = tf.reshape(x, [60, -1])

with tf.GradientTape() as tape: # 梯度求解
# y:[128]
# h1=x@w+b,希望x的shape是[128,28*28],所以對x的shape進行reshape
h1 = x @ w1 + b1 # [b,784]@[784,256]+[256]=[b,256]
h1 = tf.nn.relu(h1) # 非線性轉換
h2 = h1 @ w2 + b2 # [b,256]@[256,128]+[128]=[b,128]
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3 # [b,128]@[b,10]+[10]=[b,10] 最後一層不加非線性轉換

# 計算誤差 compute loss
y_onehot = tf.one_hot(y, depth=10) # [b]==>[b,10]

# 均方差 mes=mean(sum(y-out)^2)
loss = tf.square(y_onehot - out) # 得到的shape[b,10]
loss = tf.reduce_mean(loss) # 得到一個標量scalar

# 計算梯度 compute gradient
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# w1 = w1 - lr * dw
w1.assign_sub(lr * grads[0]) # 原地更新,保持Variable型別
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])

if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))