1. 程式人生 > >K最近鄰演算法(KNN)---sklearn+python實現

K最近鄰演算法(KNN)---sklearn+python實現

def main():
    import numpy as np
    from sklearn import datasets
    digits=datasets.load_digits()
    x=digits.data
    y=digits.target
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
    from sklearn.neighbors import KNeighborsClassifier

    #Grid Search定義要搜尋的引數
    param_grid=[
        {
            'weights':['uniform'],
            'n_neighbors':[i for i in range(1,11)]
        },
        {
            'weights':['distance'],
            'n_neighbors':[i for i in range(1,11)],
            'p':[i for i in range(1,6)]
        }
    ]
    knn_clf=KNeighborsClassifier()
    from sklearn.model_selection import GridSearchCV
    #n_jobs採用幾個核來處理,-1代表計算機有幾個核就用幾個核進行並行處理,搜尋過程中verbose可以進行資訊輸出,幫助理解搜尋狀態
    grid_search=GridSearchCV(knn_clf,param_grid,n_jobs=-1,verbose=1)
    grid_search.fit(x_train,y_train)
    #返回網格搜尋最佳分類器
    print(grid_search.best_estimator_)
    #返回網格搜尋最佳分類器的引數
    print(grid_search.best_params_)
    #返回網格搜尋最佳分類器的分數
    print(grid_search.best_score_)
    knn_clf=grid_search.best_estimator_
    print(knn_clf.score(x_test,y_test))
if __name__ == '__main__':
    main()
Fitting 3 folds for each of 60(10+50) candidates, totalling 180 fits
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   30.6s finished
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=3,
           weights='distance')
{'n_neighbors': 3, 'weights': 'distance', 'p': 3}
0.985386221294
0.983333333333
在衡量距離時,其實還有一個非常重要的概念就是資料歸一化Feature Scaling