基於TensorFlow的最近鄰(NN)分類器——以MNIST識別為例
阿新 • • 發佈:2019-01-28
一、最近鄰分類理論
二、TF在CPU上實現NN分類
具體程式碼如下:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
'''========load data========'''
#mnist = input_data.read_data_sets('MNIST_data', one_hot=True) #'MNIST_data'設定請參考前幾篇文章
# 獲得訓練樣本個數
#train_nums = mnist.train.num_examples
# 讀取所有訓練樣本和測試樣本
#X_train = mnist.train.images
#X_test = mnist.test.images
#Y_train = mnist.train.labels
#Y_test = mnist.test.labels
# 批量讀取部分樣本
X_train,Y_train = mnist.train.next_batch(1000)
X_test,Y_test = mnist.test.next_batch(200)
'''========0.定義常量========'''
insize = 784 #input size
'''計算圖輸入佔位符'''
xs = tf.placeholder(tf.float32,[None,insize])
xst = tf.placeholder(tf.float32,[insize])
'''使用 L1 距離進行最近鄰計算'''
# L1:dist = sum(|X1-X2|) 或 L2:dist=sqrt(sum(|X1-X2|^2))
dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))),
reduction_indices=1)
#或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1)
# 預測: 獲得最小距離的索引,然後根據此索引的類標和正確的類標進行比較
index = tf.arg_min(dist,0)
# 初始化所有變數
init = tf.global_variables_initializer()
#定義一個正確率計算器
Accuracy = 0;
# 執行會話
with tf.Session() as sess:
sess.run(init)
# 只能迴圈地對測試樣本進行預測
for i in range(len(X_test)):
#print('Dist=',sess.run(dist,feed_dict={xs:X_train,xst:X_test[i,:]}))
id = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]})
# 計算預測標籤和正確標籤用於比較
Predict_label = np.argmax(Y_train[id])
True_label = np.argmax(Y_test[i])
print("Test Sample",i,"Prediction label:",Predict_label,
"True Class label:",True_label)
# 計算精確度
if Predict_label == True_label:
Accuracy +=1
print("Accuracy=",Accuracy/len(X_test))
方法二:將其讀資料和NN分類單獨寫出函式,方便後期呼叫
import tensorflow as tf
def load_mnist_data(filename,isbatch=0,train_nums=1000,test_nums=200):
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(filename, one_hot=True)
#2、批量獲取樣本
if isbatch==1:
X_train,Y_train = mnist.train.next_batch(train_nums)
X_test,Y_test = mnist.test.next_batch(test_nums)
return X_train,Y_train,X_test,Y_test
else:
#1、獲取全部樣本
X_train = mnist.train.images #[1:10]
X_test = mnist.test.images
Y_train = mnist.train.labels
Y_test = mnist.test.labels
return X_train,Y_train,X_test,Y_test
def NN_Classifier(X_train,Y_train,X_test,Y_test,dims=784,dist_metric='L1'):
# 計算圖輸入佔位符
xs = tf.placeholder(tf.float32,[None,dims])
xst = tf.placeholder(tf.float32,[dims])
# 使用 L1 距離進行最近鄰計算
# L1:dist = sum(|X1-X2|) 或 L2:dist=sqrt(sum(|X1-X2|^2))
dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))),
reduction_indices=1)
#或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1)
# 預測: 獲得最小距離的索引,然後根據此索引的類標和正確的類標進行比較
index = tf.arg_min(dist,0)
# 初始化所有變數
init = tf.global_variables_initializer()
#定義一個正確率計算器
Accuracy = 0
# 執行會話
with tf.Session() as sess:
sess.run(init)
# 只能迴圈地對測試樣本進行預測
for i in range(len(X_test)):
id = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]})
# 計算預測標籤和正確標籤用於比較
Predict_label = np.argmax(Y_train[id])
True_label = np.argmax(Y_test[i])
print("Test Sample",i,"Prediction label:",Predict_label,
"True Class label:",True_label)
# 計算精確度
if Predict_label == True_label:
Accuracy +=1
print("Accuracy=",Accuracy/len(X_test))
return Accuracy
if __name__ == '__main__':
X_train,Y_train,X_test,Y_test = load_mnist_data("MNIST_data",isbatch=1,train_nums=1000,test_nums=200)
Accuracy = NN_Classifier(X_train,Y_train,X_test,Y_test,dims=784,dist_metric='L1')