機器學習實戰--KNN
阿新 • • 發佈:2018-12-15
import numpy as np import operator def createDataSet(): group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group,labels def classify0(inX,dataset,labels,K): datasetsize = dataset.shape[0] #返回一個5行一列的矩陣,每行和INX相同 diffMat = np.tile(inX,(datasetsize,1)) - dataset #矩陣求平方是對其中每一個元素求平方 sqDiffMat = diffMat**2 #axis=1表示對列求和 sqDistance = sqDiffMat.sum(axis=1) distance = sqDistance**0.5 #返回按值從小到大排列的原始下標陣列 sortedDistIndicies = distance.argsort() classCount = {} for i in range(K): voteLable = labels[sortedDistIndicies[i]] classCount[voteLable] = classCount.get(voteLable,0) + 1 #將字典元素按照第二項排序,sorted預設遞增排序,reverse取反 sortedClassCount = sorted(classCount.items(), key = operator.itemgetter(1),reverse = True) return sortedClassCount[0][0] if __name__ == '__main__': group,labels = createDataSet() labelpredict = classify0([0.5,0.6],group,labels,2) print(labelpredict)