基於TensorFlow的K近鄰(KNN)分類器實現——以MNIST為例
阿新 • • 發佈:2019-01-03
KNN分類原理
TF的KNN程式碼
def load_mnist_data(filename,isbatch=0,train_nums=1000,test_nums=200):
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(filename, one_hot=True)
#2、批量獲取樣本
if isbatch==1:
X_train,Y_train = mnist.train.next_batch(train_nums)
X_test,Y_test = mnist.test.next_batch(test_nums)
return X_train,Y_train,X_test,Y_test
else:
#1、獲取全部樣本
X_train = mnist.train.images[0:20000] #[1:10]
X_test = mnist.test.images[0:300]
Y_train = mnist.train.labels[0:20000]
Y_test = mnist.test.labels[0:300]
return X_train,Y_train,X_test,Y_test
def KNN_Classifier(X_train,Y_train,X_test,Y_test,K=5 ,dims=784,dist_metric='L1'):
# 計算圖輸入佔位符
xs = tf.placeholder(tf.float32,[None,dims])
xst = tf.placeholder(tf.float32,[dims])
# 使用 L1 距離進行最近鄰計算
# L1:dist = sum(|X1-X2|) 或 L2:dist=sqrt(sum(|X1-X2|^2))
dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))),
reduction_indices=1 )
#或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1)
# 預測: 獲得前K個最小距離的索引,用於與正確標籤比較
#index = tf.arg_min(dist,0)
if K is None:
dim = tf.size(tf.shape(dist)) #矩陣元素的個數
if dim==1:
K = tf.shape(dist)[0]
elif dim==2:
K = tf.shape(dist)[1]
else:
K = tf.shape(dist)[-1]
# 從小到大排序,取前K個最小的
value,index = tf.nn.top_k(-dist,k=K)
value = -value
# 初始化所有變數
init = tf.global_variables_initializer()
#定義一個正確率計算器
Accuracy = 0
#執行會話
with tf.Session() as sess:
sess.run(init)
# 只能迴圈地對測試樣本進行預測
for i in range(len(X_test)):
idx = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]})
# 計算預測標籤和正確標籤用於比較
Klabels = np.argmax(Y_train[idx],axis=1) #統計K行01標籤中為1的下標
Predict_label = np.argmax(np.bincount(Klabels)) #統計下標陣列中出現次數最多的值
#print(Y_train[idx],'\n',Klabels,'\n',Predict_label)
True_label = np.argmax(Y_test[i])
print("Test Sample",i,"Prediction label:",Predict_label,
"True Class label:",True_label)
# 計算精確度
if Predict_label == True_label:
Accuracy +=1
print("Accuracy=",Accuracy/len(X_test))
return Accuracy
if __name__ == '__main__':
X_train,Y_train,X_test,Y_test = load_mnist_data("MNIST_data",isbatch=0,train_nums=1000,test_nums=200)
Accuracy = KNN_Classifier(X_train,Y_train,X_test,Y_test,K=5,dims=784,dist_metric='L1')
實驗結果比較:
固定選擇訓練樣本前20000個,測試樣本前300個(不是採用batch隨機批量獲取樣本)的實驗結果如下:
K值 | Accuracy(%) |
---|---|
k=1 | 95.67 |
K=3 | 95.33 |
K=5 | 95.33 |
K=7 | 94.67 |