1. 程式人生 > >機器學習實戰--KNN

機器學習實戰--KNN

import numpy as np
import operator
def createDataSet():
    group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group,labels

def classify0(inX,dataset,labels,K):
    datasetsize = dataset.shape[0]
    #返回一個5行一列的矩陣,每行和INX相同
    diffMat = np.tile(inX,(datasetsize,1)) - dataset
    #矩陣求平方是對其中每一個元素求平方
    sqDiffMat = diffMat**2
    #axis=1表示對列求和
    sqDistance = sqDiffMat.sum(axis=1)
    distance = sqDistance**0.5
    #返回按值從小到大排列的原始下標陣列
    sortedDistIndicies = distance.argsort()
    classCount = {}
    for i in range(K):
        voteLable = labels[sortedDistIndicies[i]]
        classCount[voteLable] = classCount.get(voteLable,0) + 1
    #將字典元素按照第二項排序,sorted預設遞增排序,reverse取反
    sortedClassCount = sorted(classCount.items(),
                              key = operator.itemgetter(1),reverse = True)
    return sortedClassCount[0][0]


if __name__ == '__main__':
    group,labels = createDataSet()
    labelpredict = classify0([0.5,0.6],group,labels,2)
    print(labelpredict)