1. 程式人生 > >tensorflow 入門例子example(加預測)

tensorflow 入門例子example(加預測)

還是以前文的兩個例子,繼續加上預測邏輯。

迴歸演算法模型,用來預測連續數值,目標不是分類值而是數字。評估迴歸預測值與真實值是否一樣,需要度量兩者之間的距離。
分類演算法模型,基於數值型輸入,預測分類值,實際目標是1和0的序列,這種模型的損失函式,一般不容易理解模型好壞,所以通常是看準確預測分類的結果的百分比。

迴歸演算法

import tensorflow as tf
import numpy as np

#-------------------1. 資料集,變數,佔位符------------------------#

# 樣本,輸入列表,正太分佈(Normal Destribution),均值為1, 均方誤差為0.1, 資料量為100個
x_vals = np.random.normal(1, 0.1, 100) # 樣本, 輸出列表, 100個值為10.0的列表 y_vals = np.repeat(10.0, 100) #佔位符 x_data = tf.placeholder(shape=[None, 1], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 1], dtype= tf.float32) #模型變數 A = tf.Variable(tf.random_normal(shape=[1, 1])) #批量大小 batch_size = 25 #訓練資料集的index,從總樣本的index,即0~99,選取80個值
train_indices = np.random.choice(len(x_vals), round(len(x_vals) *0.8), replace = False) #測試資料集的index,扣除上面的train_indices,剩下的20個值 test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices))) #訓練資料集 & 測試資料集 x_vals_train = x_vals[train_indices] x_vals_test = x_vals[test_indices] y_vals_train = y_vals[train_indices] y_vals_test = y_vals[test_indices] #-----------------2. 模型,損失函式,優化器演算法--------------------------#
# 我們定義的模型,是一個線型函式,即 y = w * x, 也就是my_output = A * x_data # x_data將用樣本x_vals。我們的目標是,算出A的值。 # 其實已經能猜出,y都是10.0的話,x均值為1, 那麼A應該是10。哈哈 my_output = tf.multiply(x_data, A) # 損失函式, 用的是模型算的值,減去實際值, 的平方。y_target就是上面的y_vals。 loss = tf.reduce_mean(tf.square(my_output - y_target)) #初始化變數 sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) # 梯度下降演算法, 學習率0.02, 可以認為每次迭代修改A,修改一次0.02。比如A初始化為20, 發現不好,於是猜測下一個A為20-0.02 my_opt = tf.train.GradientDescentOptimizer(learning_rate=0.02) train_step = my_opt.minimize(loss)#目標,使得損失函式達到最小值 #-----------------3. 迭代訓練--------------------------# for i in range(100):#0到100,不包括100 # 隨機拿25個index rand_index = np.random.choice(len(x_vals_train), size = batch_size) # 從訓練集拿出25個樣本,轉置一下,因為x_data的shape是[None, 1] #注意是[x_vals_train[rand_index]],轉為二維的1x20的陣列,才能通過transpose轉置為20x1的陣列,不能寫成x_vals_train[rand_index] rand_x = np.transpose([x_vals_train[rand_index]]) rand_y = np.transpose([y_vals_train[rand_index]]) #損失函式引用的placeholder(直接或間接用的都算), x_data使用樣本rand_x, y_target用樣本rand_y sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y}) #列印 if i%25==0: print('step: ' + str(i) + ' A = ' + str(sess.run(A))) print('loss: ' + str(sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y}))) #-----------------4. 評估模型--------------------------# #以上這種評估,測試集跟訓練集是完全分開的。沒有用A去評測測試集,只看兩種集的均方誤差是不是差不多 mse_test = sess.run(loss, feed_dict={x_data: np.transpose([x_vals_test]), y_target: np.transpose([y_vals_test])}) mse_train = sess.run(loss, feed_dict={x_data: np.transpose([x_vals_train]), y_target: np.transpose([y_vals_train])}) print('MSE on test: ' + str(np.round(mse_test, 2))) print('MSE on train: ' + str(np.round(mse_train, 2)))

2. 分類演算法

直接上程式碼。

#import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
#sklearn是機器學習套件,有很多資料集
#安裝:pip install -U scikit-learn
#      sklearn依賴python>=2.7, numpy(python擅長陣列處理的數學庫), scipy(python演算法庫和資料工具包)
from sklearn import datasets


#-------------------1. 資料集,變數,佔位符------------------------#
iris = datasets.load_iris()
print('sample feature: feature_names: ' + str(iris.feature_names) + " data length: " + str(len(iris.data)))
print('sample target: target_names: ' + str(iris.target_names) + " target length: " + str(len(iris.target)))
#樣本資料,一個150x4的二維列表
#print(iris.data)
#樣本標籤,一個長度為150的一維列表
#print(iris.target)


#抽取的樣本標籤, 只要第一種,是第一種,則為1,否則為0
temp = []
for x in iris.target:
    temp.append(1 if x== 0 else 0)
iris_target = np.array(temp)#列表轉陣列,以上幾行,也可以寫成:iris_target = np.array([1 if x== 0 else 0 for x in iris.target])
print('iris_target: ')
print(iris_target)

#抽取的樣本輸入,只用兩個引數,也就是花瓣長度和寬度
iris_2d = np.array([[x[2], x[3]] for x in iris.data])
print('iris_2d: ')
print(iris_2d)


#將樣本分為訓練集和測試集
#訓練資料集的index,從總樣本的index,即0150,選取120個值
train_indices = np.random.choice(len(iris_2d), round(len(iris_2d) *0.8), replace = False)
#測試資料集的index,扣除上面的train_indices,剩下的30個值
test_indices = np.array(list(set(range(len(iris_2d))) - set(train_indices)))

#訓練資料集 & 測試資料集
x_vals_train = iris_2d[train_indices]
x_vals_test = iris_2d[test_indices]
y_vals_train = iris_target[train_indices]
y_vals_test = iris_target[test_indices]

#批量訓練大小為20
batch_size = 20
x1_data = tf.placeholder(shape=[None, 1], dtype=tf.float32)
x2_data = tf.placeholder(shape=[None, 1], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)

A = tf.Variable(tf.random_normal(shape=[1,1]))
b = tf.Variable(tf.random_normal(shape=[1,1]))

#-----------------2. 模型,損失函式,優化器演算法--------------------------#
#定義模型my_output = x1 - (A*x2 + b)
my_mult = tf.matmul(x2_data, A)
my_add = tf.add(my_mult, b)
my_output = tf.subtract(x1_data, my_add)

#損失函式
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=my_output, logits=y_target)
my_opt = tf.train.GradientDescentOptimizer(0.05)
train_step = my_opt.minimize(xentropy)

#初始化變數
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)



#-----------------3. 迭代訓練--------------------------#


#開始迭代,更新模型,也就是計算出A和b
for i in range(1000):
    #從np.arange(len(x_vals_train))生成大小為20的均勻隨機樣本,如:[ 66  42  96 115  45 127  31  70 148  57  60 127  56  96   7  63  75 127 110 144]
    rand_index = np.random.choice(len(x_vals_train), size=batch_size)
    #print('rand_index ' + str(rand_index))
    #rand_x為20x2的陣列,類似醬紫 [[4.5 1.5] 。。。。[1.3 0.2]]
    rand_x = x_vals_train[rand_index]
    #print(' rand_x ' + str(rand_x))
    #print(' rand_x shape: ' + str(rand_x.shape))

    rand_x1 = np.array([[x[0]] for x in rand_x])
    rand_x2 = np.array([[x[1]] for x in rand_x])
    #print(' rand_x1 ' + str(rand_x1))
    #print(' rand_x2 ' + str(rand_x2))

    #rand_y如果直接使用y_vals_train[rand_index],則得到的是[0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 1 0 0 0],是一維的, shape是(20,)也就是一維陣列,陣列有20個元素
    #但是我們想要多維陣列,也就是shape為(20, 1),因為placeholder也是這樣的維度
    #[[y] for y in y_vals_train[rand_index]] 得到的是[[0], [0], [0], [0], [0], [0], [1], [0], [1], [0], [0], [0], [0], [1], [1], [1], [1], [1], [0], [0]]
    #然後,轉化為陣列,維度是(20, 1)
    rand_y = np.array([[y] for y in y_vals_train[rand_index]])
    #print('rand_y shape ' + str(rand_y.shape))
    #print('rand_y  ' + str(rand_y))

    sess.run(train_step, feed_dict={x1_data: rand_x1, x2_data: rand_x2, y_target: rand_y})
    if(i+1)%200==0:
        print('step: ' + str(i) + ' A = ' + str(sess.run(A)) + ' b = ' + str(sess.run(b)))


#-----------------4. 評估模型--------------------------#
#用已有的A,b 變數,計算my_output
y_prediction = tf.squeeze(tf.round(tf.nn.sigmoid(my_output)))
correct_prediction = tf.equal(y_prediction, y_target)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#測試集資料提取,要轉為二維的矩陣
x_vals_test_x1 = np.array([[x[0]] for x in x_vals_test])
x_vals_test_x2 = np.array([[x[1]] for x in x_vals_test])
y_vals_test_predict = np.array([[y] for y in y_vals_test])

#訓練集資料提取,要轉為二維的矩陣
x_vals_train_x1 = np.array([[x[0]] for x in x_vals_train])
x_vals_train_x2 = np.array([[x[1]] for x in x_vals_train])
y_vals_train_predict = np.array([[y] for y in y_vals_train])

print('x_vals_test ')
print(x_vals_test)
print('x_vals_test_x1 ')
print(x_vals_test_x1)
print('x_vals_test_x2 ')
print(x_vals_test_x2)

print('y_vals_test: ')
print(y_vals_test)
print('y_vals_test_predict: ')
print(y_vals_test_predict)


acc_value_test = sess.run(accuracy, feed_dict={x1_data: x_vals_test_x1, x2_data: x_vals_test_x2, y_target: y_vals_test_predict})
acc_value_train = sess.run(accuracy, feed_dict={x1_data: x_vals_train_x1, x2_data: x_vals_train_x2, y_target: y_vals_train_predict})

print('Accuracy on test set ' + str(acc_value_test))
print('Accuracy on train set ' + str(acc_value_train))