1. 程式人生 > >基於TensorFlow的K近鄰(KNN)分類器實現——以MNIST為例

基於TensorFlow的K近鄰(KNN)分類器實現——以MNIST為例

KNN分類原理

TF的KNN程式碼

def load_mnist_data(filename,isbatch=0,train_nums=1000,test_nums=200):
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets(filename, one_hot=True)
       #2、批量獲取樣本
    if isbatch==1:
        X_train,Y_train = mnist.train.next_batch(train_nums)
        X_test,Y_test = mnist.test.next_batch(test_nums)
        return
X_train,Y_train,X_test,Y_test else: #1、獲取全部樣本 X_train = mnist.train.images[0:20000] #[1:10] X_test = mnist.test.images[0:300] Y_train = mnist.train.labels[0:20000] Y_test = mnist.test.labels[0:300] return X_train,Y_train,X_test,Y_test def KNN_Classifier(X_train,Y_train,X_test,Y_test,K=5
,dims=784,dist_metric='L1')
:
# 計算圖輸入佔位符 xs = tf.placeholder(tf.float32,[None,dims]) xst = tf.placeholder(tf.float32,[dims]) # 使用 L1 距離進行最近鄰計算 # L1:dist = sum(|X1-X2|) 或 L2:dist=sqrt(sum(|X1-X2|^2)) dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))), reduction_indices=1
) #或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1) # 預測: 獲得前K個最小距離的索引,用於與正確標籤比較 #index = tf.arg_min(dist,0) if K is None: dim = tf.size(tf.shape(dist)) #矩陣元素的個數 if dim==1: K = tf.shape(dist)[0] elif dim==2: K = tf.shape(dist)[1] else: K = tf.shape(dist)[-1] # 從小到大排序,取前K個最小的 value,index = tf.nn.top_k(-dist,k=K) value = -value # 初始化所有變數 init = tf.global_variables_initializer() #定義一個正確率計算器 Accuracy = 0 #執行會話 with tf.Session() as sess: sess.run(init) # 只能迴圈地對測試樣本進行預測 for i in range(len(X_test)): idx = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]}) # 計算預測標籤和正確標籤用於比較 Klabels = np.argmax(Y_train[idx],axis=1) #統計K行01標籤中為1的下標 Predict_label = np.argmax(np.bincount(Klabels)) #統計下標陣列中出現次數最多的值 #print(Y_train[idx],'\n',Klabels,'\n',Predict_label) True_label = np.argmax(Y_test[i]) print("Test Sample",i,"Prediction label:",Predict_label, "True Class label:",True_label) # 計算精確度 if Predict_label == True_label: Accuracy +=1 print("Accuracy=",Accuracy/len(X_test)) return Accuracy if __name__ == '__main__': X_train,Y_train,X_test,Y_test = load_mnist_data("MNIST_data",isbatch=0,train_nums=1000,test_nums=200) Accuracy = KNN_Classifier(X_train,Y_train,X_test,Y_test,K=5,dims=784,dist_metric='L1')

實驗結果比較:
固定選擇訓練樣本前20000個,測試樣本前300個(不是採用batch隨機批量獲取樣本)的實驗結果如下:

K值 Accuracy(%)
k=1 95.67
K=3 95.33
K=5 95.33
K=7 94.67