1. 程式人生 > 實用技巧 >mooc機器學習第六天-K近鄰,決策樹,樸素貝葉斯分類器簡單嘗試

mooc機器學習第六天-K近鄰,決策樹,樸素貝葉斯分類器簡單嘗試

1.下面的程式碼是上一篇理論中的小例子

from sklearn.neighbors import KNeighborsClassifier # K近鄰分類器
from sklearn.datasets import load_iris  # 鳶尾花資料
from sklearn.tree import DecisionTreeClassifier  #決策樹分類器
from sklearn.model_selection import cross_val_score #交叉驗證值函式
from sklearn.naive_bayes import GaussianNB #樸素貝葉斯分類器
import  numpy as np #科學計算庫


#小示例實現順序與導包順序相同


X=[[0],[1],[2],[3]]
y=[0,0,1,1]

neigh=KNeighborsClassifier(n_neighbors=3)
neigh.fit(X,y)
print("+++++K近鄰+++++")
print(neigh.predict([[1.2]]))



clf=DecisionTreeClassifier()
irls=load_iris()
re=cross_val_score(clf,irls.data,irls.target,cv=10)
print("+++++交叉驗證+++++")
print(re)

print("+++++決策樹+++++")
clf.fit(X,y)
print(clf.predict([[2.2]]))



A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]])
B=np.array([1,1,1,2,2,2])

clf1=GaussianNB(priors=None)
clf1.fit(A,B)
r=clf1.predict([[-0.8,-1]])
print("+++++樸素畢貝葉斯+++++")
print(r)

2.結果

  

+++++K近鄰+++++
[0]
+++++交叉驗證+++++
[ 1.          0.93333333  1.          0.93333333  0.93333333  0.86666667
  0.93333333  0.93333333  1.          1.        ]
+++++決策樹+++++
[1]
+++++樸素畢貝葉斯+++++
[1]

3.利用mooc給的feature資料實踐

import  numpy as np
import  pandas as pd

from  sklearn.preprocessing import  Imputer#資料預處理庫
from  sklearn.cross_validation import train_test_split  #打亂訓練資料
from  sklearn.metrics  import  classification_report #計算召回率,F1值,精準度


from sklearn.neighbors import  KNeighborsClassifier
from sklearn.tree import  DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB


def load_datasets(feature_path,lable_path):
    #設定shape
    feature=np.ndarray(shape=(0,41))
    lable=np.ndarray(shape=(0,1))
    #處理檔案缺失值
    for file in feature_path:
        df=pd.read_table(file,delimiter=',',na_values="?",header=None)
        imp=Imputer(missing_values='NaN',strategy='mean',axis=0)
        imp.fit(df)
        #Impute all missing values in X.
        df=imp.transform(df)
        feature=np.concatenate((feature,df))

    for file in lable_path:
        df=pd.read_table(file,header=None)
        lable=np.concatenate((lable,df))


    lable=np.ravel(lable)
    return feature, lable


if __name__ == '__main__':
   '''資料具體路徑'''
   featurepaths=['/A/A.feature',
                '/B/B.feature',
                '/C/C.feature',
                '/D/D.feature',
                '/E/E.feature'
                ]
   labelPaths=['/A/A.label',
              '/B/B.label',
              '/C/C.label',
              '/D/D.label',
              '/E/E.label']

   '''讀如資料'''
   x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4])
   x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:])
   #打亂訓練資料
   x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0)


   #建立三種分類器並預測
   print('Start training knn')
   knn = KNeighborsClassifier().fit(x_train, y_train)
   print('Training done')
   answer_knn = knn.predict(x_test)
   print('Prediction done')

   print('Start training DT')
   dt = DecisionTreeClassifier().fit(x_train, y_train)
   print('Training done')
   answer_dt = dt.predict(x_test)
   print('Prediction done')

   print('Start training Bayes')
   gnb = GaussianNB().fit(x_train, y_train)
   print('Training done')
   answer_gnb = gnb.predict(x_test)
   print('Prediction done')


   #結果展示
   '''
   Build a text report showing the main classification metrics
   classification_report&精確度/召回率/F1值
   '''
   print('\n\nThe classification report for knn:')
   print(classification_report(y_test, answer_knn))
   print('\n\nThe classification report for DT:')
   print(classification_report(y_test, answer_dt))
   print('\n\nThe classification report for Bayes:')
   print(classification_report(y_test, answer_gnb))