機器學習python分類演算法
阿新 • • 發佈:2019-01-05
from pandas import read_csv from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC filename = 'pima_data.csv' names = ['preg','plas','pres','skin','test','mass','pedi','age','class'] data = read_csv(filename,names = names) array = data.values X = array[:,0:8] Y = array[:,8] num_folds = 10 seed = 7 kfold = KFold(n_splits=num_folds,random_state=seed) #邏輯迴歸(線性演算法) # model = LogisticRegression() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean()) #線性判別分析(線性演算法) '''將高維的模式樣本投影到最佳鑑別向量空間,可以抽取分類資訊和壓縮特徵空間維數''' # model = LinearDiscriminantAnalysis() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean()) #非線性演算法 #K近鄰演算法 '''如果一個樣本在特徵空間的K個最相似的樣本中的大多數屬於一個類別,該樣本也屬於這個類別''' # model = KNeighborsClassifier() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean()) #貝葉斯分類器 '''先計算先驗概率,用貝葉斯公式計算出後驗概率,最小錯誤率上的優化''' # model = GaussianNB() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean()) #分類與迴歸樹 # model = DecisionTreeClassifier() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean()) #支援向量機SVM # model = SVC() # result = cross_val_score(model,X,Y,cv=kfold) # print(result.mean())