資料分析之評估演算法
阿新 • • 發佈:2019-02-09
# 通過卡方檢驗選定資料特徵 import pandas from pandas import read_csv from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression # 匯入資料 iris =pandas.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header=None) iris.columns=['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm','Species'] # 將資料分為輸入資料和輸出結果 arrary = iris.values X =arrary[:,0:4] le = LabelEncoder() le.fit(iris['Species']) Y = le.transform(iris['Species']) # 對花的類別進行編號處理 # 分為測試集和訓練集 test_size = 0.33 seed = 6 X_train, X_test, Y_traing, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed) # 選擇模型 model = LogisticRegression() # 進行訓練 model.fit(X_train, Y_traing) # 用測試集檢視訓練結果 result = model.score(X_test, Y_test) print("演算法評估結果:%.3f%%" % (result * 100))