1. 程式人生 > 實用技巧 >前向傳播

前向傳播

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#x:[60k,28,28]
#y:[60k]
(x,y),_ = datasets.mnist.load_data()

# X:[0~255] => [0~1.]
x = tf.convert_to_tensor(x,dtype= tf.float32) / 255.
y = tf.convert_to_tensor(y,dtype= tf.int32)

print(x.shape,y.shape,x.dtype,y.dtype) print(tf.reduce_max(x),tf.reduce_min(x))#檢視最小值和最大值 print(tf.reduce_max(y),tf.reduce_min(y)) train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128) #每次載入128個數據集 train_iter = iter(train_db) sample = next(train_iter) print('batch:',sample[0].shape,sample[1].shape)
#資料集載入完畢 #[b,784] => [b,512] => [b,128] => [b,10] #[dim_in,din_out] , [dim_out] w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))#不加方差範圍 會梯度爆炸 b1 = tf.Variable(tf.zeros([256])) w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1)) b2 = tf.Variable(tf.zeros([128])) w3
= tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1)) b3 = tf.Variable(tf.zeros([10])) lr = 1e-3 for epoch in range(10): #對每個資料集迭代10次 for step, (x, y) in enumerate(train_db): #外層迴圈:對128張圖片進行一次迴圈 迴圈6k張圖片 #x:[128,28,28] #y:[128] #[b,28,28] => [b,28*28] x = tf.reshape(x,[-1,28*28]) with tf.GradientTape() as type:#記錄梯度資訊 預設跟蹤tf.Variable型別 #x:[b,28*28] # h1 = x@w1 + b1 #[b,784]@[784,256] + [256] => [b,256] +[256] h1 = x@w1 + b1 #自動broadcast h1 = tf.nn.relu(h1) #轉換為非線性 h2 = h1@w2 + b2 h2 = tf.nn.relu(h2) out = h2@w3 + b3 #計算誤差 #out:[b,10] #y:[b] => [b,10] y_onehot = tf.one_hot(y,depth=10) #mse = mean(sum(y-out)^2) loss = tf.square(y_onehot - out) #mean:scalar loss = tf.reduce_mean(loss) #計算梯度 grads = type.gradient(loss,[w1, b1, w2, b2, w3, b3]) #grads返回梯度陣列 #w1 = w1 - lr * w1 grad w1.assign_sub(lr * grads[0]) #原地更新 型別保持不變 b1.assign_sub(lr * grads[1]) w2.assign_sub(lr * grads[2]) b2.assign_sub(lr * grads[3]) w3.assign_sub(lr * grads[4]) b3.assign_sub(lr * grads[5]) if step % 100 == 0: print(step, 'loss', float(loss))

運算結果: