機器學習——KNN
阿新 • • 發佈:2018-10-05
load -s 創建 數據 sklearn lac bsp otl 訓練數據
導入類庫
1 import numpy as np 2 from sklearn.neighbors import KNeighborsClassifier 3 from sklearn.model_selection import train_test_split 4 from sklearn.preprocessing import StandardScaler 5 from sklearn.linear_model import LinearRegression 6 from sklearn.metrics import r2_score 7 from sklearn.datasets importload_iris 8 import matplotlib.pyplot as plt 9 import pandas as pd 10 import seaborn as sns
# 熵增益 # 熵越大,信息量越大,蘊含的不確定性越大
KNN 1.計算待預測值到所有點的距離 2.對所有距離排序 3.找出前K個樣本裏面類別最多的類,作為待預測值的類別
代碼
1 A = np.array([[1, 1], [1, 1.5], [0.5, 1.5]]) 2 B = np.array([[3.0, 3.0], [3.0, 3.5], [2.8, 3.1]]) 3 4 5 def knn_pre_norm(point):6 a_len = np.linalg.norm(point - A, axis=1) 7 b_len = np.linalg.norm(point - B, axis=1) 8 print(a_len.min()) 9 print(b_len.min()) 10 11 12 def knn_predict_rev(point): 13 X = np.array([[1, 1], [1, 1.5], [0.5, 1.5], [3.0, 3.0], [3.0, 3.5], [2.8, 3.1]]) 14 Y = np.array([0, 0, 0, 1, 1, 1])15 16 knn = KNeighborsClassifier(n_neighbors=2) 17 knn.fit(X, Y) 18 19 print(knn.predict(np.array([[1.0, 3.0]]))) 20 21 22 def iris_linear(): 23 # 加載iris數據 24 li = load_iris() 25 # 散點圖 26 # plt.scatter(li.data[:, 0], li.data[:, 1], c=li.target) 27 # plt.scatter(li.data[:, 2], li.data[:, 3], c=li.target) 28 # plt.show() 29 # 分割測試集和訓練集,測試集占整個數據集的比例是0.25 30 x_train, x_test, y_train, y_test = train_test_split(li.data, li.target, test_size=0.25) 31 # 創建KNN分類,使用最少5個鄰居作為類別判斷標準 32 knn = KNeighborsClassifier(n_neighbors=5) 33 # 訓練數據 34 knn.fit(x_train, y_train) 35 # 預測測試集 36 # print(knn.predict(x_test)) 37 # 預測np.array([[6.3, 3, 5.2, 2.3]]) 38 print(knn.predict(np.array([[6.3, 3, 5.2, 2.3]]))) 39 # 預測np.array([[6.3, 3, 5.2, 2.3]])所屬各個類別的概率 40 print(knn.predict_proba(np.array([[6.3, 3, 5.2, 2.3]]))) 41 42 43 if __name__ == ‘__main__‘: 44 # knn_predict_rev(None) 45 # knn_pre_norm(np.array([2.3,2.3])) 46 iris_linear()
機器學習——KNN