1. 程式人生 > >K-近鄰演算法(KNN)

K-近鄰演算法(KNN)

#-*- coding:utf-8 -*-
import numpy as np
import operator

def createDataset():
    #四組二維特徵
    group = np.array([[5,115],[7,106],[56,11],[66,9]])
    #四組對應標籤
    labels = ('動作片','動作片','愛情片','愛情片')
    return group,labels

"""
KNN演算法
"""
def classify(intX, dataSet, labels, k):
    '''
    numpy中shape[0]返回陣列的行數,shape[1]返回列數
    '''
    dataSetSize = dataSet.shape[0]

    """
    將intX在橫向重複dataSetSize次,縱向重複1次
    例如intX=([1,2])--->([[1,2],[1,2],[1,2],[1,2]])便於後面計算
    """
    diffMat = np.tile(intX, (dataSetSize, 1)) - dataSet

    """
    計算距離:歐式距離, 特徵相減後乘方,然後再開方
    """
    sqdifMax = diffMat**2
    seqDistances = sqdifMax.sum(axis=1)
    distances = seqDistances**0.5

    #返回distance中元素從小到大排序後的索引
    print ("distances:",distances)
    sortDistance = distances.argsort()
    print ("sortDistance:", sortDistance)

    """
    取出前k個元素的類別
    """
    classCount = {}
    for i in range(k):
        voteLabel = labels[sortDistance[i]]
        s = "第{}個voteLabel={}".format(i, voteLabel)
        print(s)
        classCount[voteLabel] = classCount.get(voteLabel,0)+1

    #dict.get(key,default=None),字典的get()方法,返回指定鍵的值,如果值不在字典中返回預設值。
    #計算類別次數

    #key=operator.itemgetter(1)根據字典的值進行排序
    #key=operator.itemgetter(0)根據字典的鍵進行排序
    #reverse降序排序字典
    sortedClassCount = sorted(classCount.items(), key = operator.itemgetter(1), reverse = True)
    #結果sortedClassCount = [('動作片', 2), ('愛情片', 1)]
    print ("sortedClassCount:")
    print(sortedClassCount)
    return sortedClassCount[0][0]

if __name__ == '__main__':
    group,labels = createDataset()
    test = [20,101]
    test_class = classify(test,group,labels,3)
    print (test_class)