mooc機器學習第六天-K近鄰,決策樹,樸素貝葉斯分類器簡單嘗試
阿新 • • 發佈:2020-06-29
1.下面的程式碼是上一篇理論中的小例子
from sklearn.neighbors import KNeighborsClassifier # K近鄰分類器 from sklearn.datasets import load_iris # 鳶尾花資料 from sklearn.tree import DecisionTreeClassifier #決策樹分類器 from sklearn.model_selection import cross_val_score #交叉驗證值函式 from sklearn.naive_bayes import GaussianNB #樸素貝葉斯分類器 import numpy as np #科學計算庫 #小示例實現順序與導包順序相同 X=[[0],[1],[2],[3]] y=[0,0,1,1] neigh=KNeighborsClassifier(n_neighbors=3) neigh.fit(X,y) print("+++++K近鄰+++++") print(neigh.predict([[1.2]])) clf=DecisionTreeClassifier() irls=load_iris() re=cross_val_score(clf,irls.data,irls.target,cv=10) print("+++++交叉驗證+++++") print(re) print("+++++決策樹+++++") clf.fit(X,y) print(clf.predict([[2.2]])) A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]]) B=np.array([1,1,1,2,2,2]) clf1=GaussianNB(priors=None) clf1.fit(A,B) r=clf1.predict([[-0.8,-1]]) print("+++++樸素畢貝葉斯+++++") print(r)
2.結果
+++++K近鄰+++++
[0]
+++++交叉驗證+++++
[ 1. 0.93333333 1. 0.93333333 0.93333333 0.86666667
0.93333333 0.93333333 1. 1. ]
+++++決策樹+++++
[1]
+++++樸素畢貝葉斯+++++
[1]
3.利用mooc給的feature資料實踐
import numpy as np import pandas as pd from sklearn.preprocessing import Imputer#資料預處理庫 from sklearn.cross_validation import train_test_split #打亂訓練資料 from sklearn.metrics import classification_report #計算召回率,F1值,精準度 from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.naive_bayes import GaussianNB def load_datasets(feature_path,lable_path): #設定shape feature=np.ndarray(shape=(0,41)) lable=np.ndarray(shape=(0,1)) #處理檔案缺失值 for file in feature_path: df=pd.read_table(file,delimiter=',',na_values="?",header=None) imp=Imputer(missing_values='NaN',strategy='mean',axis=0) imp.fit(df) #Impute all missing values in X. df=imp.transform(df) feature=np.concatenate((feature,df)) for file in lable_path: df=pd.read_table(file,header=None) lable=np.concatenate((lable,df)) lable=np.ravel(lable) return feature, lable if __name__ == '__main__': '''資料具體路徑''' featurepaths=['/A/A.feature', '/B/B.feature', '/C/C.feature', '/D/D.feature', '/E/E.feature' ] labelPaths=['/A/A.label', '/B/B.label', '/C/C.label', '/D/D.label', '/E/E.label'] '''讀如資料''' x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4]) x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:]) #打亂訓練資料 x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0) #建立三種分類器並預測 print('Start training knn') knn = KNeighborsClassifier().fit(x_train, y_train) print('Training done') answer_knn = knn.predict(x_test) print('Prediction done') print('Start training DT') dt = DecisionTreeClassifier().fit(x_train, y_train) print('Training done') answer_dt = dt.predict(x_test) print('Prediction done') print('Start training Bayes') gnb = GaussianNB().fit(x_train, y_train) print('Training done') answer_gnb = gnb.predict(x_test) print('Prediction done') #結果展示 ''' Build a text report showing the main classification metrics classification_report&精確度/召回率/F1值 ''' print('\n\nThe classification report for knn:') print(classification_report(y_test, answer_knn)) print('\n\nThe classification report for DT:') print(classification_report(y_test, answer_dt)) print('\n\nThe classification report for Bayes:') print(classification_report(y_test, answer_gnb))