Python 機器學習-鳶尾花分類
阿新 • • 發佈:2019-01-07
''' #Python 機器學習-鳶尾花分類 ''' #匯入類庫 from pandas import read_csv from pandas.plotting import scatter_matrix from matplotlib import pyplot from sklearn.model_selection import train_test_split from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC #匯入資料 filename = 'iris.data.csv' names = ['separ-length','separ-width','petal-length','petal-width','class'] dataset = read_csv(filename,names=names) #檢視資料緯度 print('資料緯度:行%s,列%s'%dataset.shape) #檢視資料前十行 print(dataset.head(10)) #統計描述資料 print(dataset.describe()) #資料分類分佈 print(dataset.groupby('class').size()) #箱線圖 dataset.plot(kind='box',subplots=True,layout=(2,2),sharex=False,sharey=False) pyplot.show() #直方圖 dataset.hist() pyplot.show() #散點矩陣圖 scatter_matrix(dataset) pyplot.show() #分離評估資料集 array=dataset.values X=array[:,0:4] Y=array[:,4] validation_size=0.2 seed=7 X_train,X_validation,Y_train,Y_validation=\ train_test_split(X,Y,test_size=validation_size, random_state=seed) #演算法審查 models={} models['LR']=LogisticRegression() models['LDA']=LinearDiscriminantAnalysis() models['KNN']=KNeighborsClassifier() models['CART']=DecisionTreeClassifier() models['NB']=GaussianNB() models['SVM']=SVC() results=[] for key in models: kfold=KFold(n_splits=10,random_state=seed) cv_results=cross_val_score(models[key],X_train, Y_train,cv=kfold,scoring='accuracy') results.append(cv_results) print('%s:%f(%f)'%(key,cv_results.mean(),cv_results.std())) #箱線圖比較演算法 fig = pyplot.figure() fig.suptitle('Algorithm Comparison') ax = fig.add_subplot(111) pyplot.boxplot(results) ax.set_xticklabels(models.keys()) pyplot.show() #使用評估資料集評估演算法 svm = SVC() svm.fit(X=X_train,y=Y_train) predictions = svm.predict(X_validation) print(accuracy_score(Y_validation,predictions)) print(confusion_matrix(Y_validation,predictions)) print(classification_report(Y_validation,predictions))