1. 程式人生 > 實用技巧 >KNN演算法實戰——海倫約會(KDtree優化)

KNN演算法實戰——海倫約會(KDtree優化)

 本文通過海倫約會的例子來測試之前寫的KDTree的效果,並且探討了特徵是否進行歸一化對整個模型的表現的影響。
最後發現在機器學習中,特徵歸一化確實對模型能提供非常大的幫助。
1
from KDTree import KDTree # 參考實現KDtree的隨筆 2 from sklearn import model_selection,preprocessing 3 import pandas as pd 4 class KNN(object): 5 def __init__(self,K=1,p=2): 6 self.kdtree= KDTree() 7
self.K =K 8 self.p=p 9 def fit(self,x_data,y_data): 10 self.kdtree.build_tree(x_data,y_data) 11 def predict(self,pre_x,label): 12 if 'class' in label: 13 return self.kdtree.predict_classification(pre_x,K=self.K) 14 else : 15 return
self.kdtree.predict_regression(pre_x,K=self.K) 16 def test_check(self,test_xx,test_y): 17 # only support classification problem 18 correct =0 19 for i,xi in enumerate(test_xx): 20 pre_y = self.kdtree.predict_classification(Xi=xi,K=self.K) 21 if pre_y == test_y[i]:
22 correct+=1 23 return correct/len(test_y) 24 25 26 file_path = "datingTestSet.txt" 27 data = pd.read_csv(file_path, sep="\t",header=None) 28 XX = data.iloc[:,:-1].values 29 Y = data.iloc[:,-1].values 30 train_xx , test_xx, train_y,test_y = model_selection.train_test_split(XX,Y,test_size= 0.2,random_state=123,shuffle=True) 31 knn=KNN(K=5,p=2) 32 knn.fit(train_xx,train_y) 33 acc = knn.test_check(test_xx,test_y) 34 print("No Standard Scale Accuracy: ",acc) 35 # 考慮到資料中不同維度之間的數值相差過大,進行特徵縮放 36 scaler = preprocessing.StandardScaler() 37 # 計算均值和標準差只能用訓練集的資料 38 scaler.fit(train_xx) 39 stand_train_xx = scaler.transform(train_xx) 40 stand_test_xx = scaler.transform(test_xx) 41 new_knn = KNN(K=5,p=2) 42 new_knn.fit(stand_train_xx,train_y) 43 new_acc = new_knn.test_check(stand_test_xx,test_y) 44 print("Standard Scale Accuracy: ",new_acc)