Logistics迴歸分類鳶尾花資料集
阿新 • • 發佈:2018-12-08
import numpy as np from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt import matplotlib as mpl import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline iris_feature = u'花萼長度', u'花萼寬度', u'花瓣長度', u'花瓣寬度', u'類別' path = '8.iris.data' # 資料檔案路徑 data = pd.read_csv(path, header=None) data.columns = iris_feature # 將data的每一列的標籤設定為iris_feature,如果不設定就預設為0到n的數字 data['類別'] = pd.Categorical(data['類別']).codes # 對每一個類別做統計進行打標籤賦予數字 x_train = data[['花萼長度', '花瓣長度']] y_train = data['類別'] lr = Pipeline([('sc', StandardScaler()), ('clf', LogisticRegression()) ]) lr.fit(x_train, y_train) N, M = 500, 500 # 橫縱各取樣多少個值 x1_min, x2_min = x_train.min(axis=0) x1_max, x2_max = x_train.max(axis=0) t1 = np.linspace(x1_min, x1_max, N) t2 = np.linspace(x2_min, x2_max, M) x1, x2 = np.meshgrid(t1, t2) # 生成網格取樣點 x_test = np.stack((x1.flat, x2.flat), axis=1) # 測試點 cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF']) cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b']) y_hat = lr.predict(x_test) y_hat = y_hat.reshape(x1.shape) # 使之與輸入的形狀相同 plt.pcolormesh(x1, x2, y_hat, cmap=cm_light) # 預測值的顯示 plt.scatter(x_train['花萼長度'], x_train['花瓣長度'], c=y_train, cmap=cm_dark, marker='o', edgecolors='k') # 樣本的顯示 plt.show()