KNN演算法,k 鄰近(python)
阿新 • • 發佈:2019-02-19
1.原理
利用歐式距離計算各個特徵的相似度,歐式距離越小,相似度越大。
2.程式碼:
from numpy import *
import operator
from os import listdir
def kNNClassify(inX, dataSet, labels, k):
#sample size
dataSize = dataSet.shape[0]
#get the difference between inX and sample
diffMat = tile(inX, (dataSize,1)) - dataSet
diffMat = diffMat**2
#get sum of each row so set axis = 1
sumMat = diffMat.sum(axis = 1)
sqdiffMat = sumMat**0.5
#sort sqdiffMat and get the index result
diffSortIndices = sqdiffMat.argsort()
#res dict
resLabels = {}
maxTimes = 0
for i range(k):
tempLabel = labels[diffSortIndices[i]]
resLabels[tempLabel] = resLabels.get(tempLabel, 0 ) + 1
for key in resLabels.keys():
if(resLabels.get(key) > maxTimes):
maxTimes = resLabels.get(key)
res = key
return res