利用邏輯迴歸,決策樹,svm計算準確率和AUC值
阿新 • • 發佈:2018-12-22
利用邏輯迴歸,決策樹,svm計算準確率和AUC值
- 匯入的包
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn import metrics from sklearn.preprocessing import label_binarize
- 讀取資料
# 讀取資料集
data_all = pd.read_csv('/home/infisa/wjht/project/DataWhale/data_all.csv', encoding='gbk')
- 劃分資料集
# 劃分訓練集和測試集 features = [x for x in data_all.columns if x not in ['status']] X = data_all[features] y = data_all['status'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
- 構建模型
lr = LogisticRegression() # 邏輯迴歸模型
lr.fit(X_train, y_train)
tr = DecisionTreeClassifier() # 決策樹模型
tr.fit(X_train, y_train)
svm = SVC() # SVM模型
svm.fit(X_train, y_train)
*模型評分
# 模型評分 lr_score = lr.score(X_test, y_test) print(lr_score) 'lr_score:0.7484232655921513' tr_score = tr.score(X_test, y_test) 'tr_score:0.6797477224947442' svm_score = svm.score(X_test, y_test) 'svm_score:0.7484232655921513'
- 計算AUC值
# 計算auc值
y_test_hot = label_binarize(y_test,classes =(0, 1)) # 將測試集標籤資料用二值化編碼的方式轉換為矩陣
lr_y_score=lr.decision_function(X_test) # 得到預測的損失值
svm_y_score = svm.decision_function(X_test) # 得到預測的損失值
lr_fpr,lr_tpr,lr_threasholds=metrics.roc_curve(y_test_hot.ravel(),lr_y_score.ravel()) # 計算ROC的值,lr_threasholds為閾值
svm_fpr,svm_tpr,svm_threasholds=metrics.roc_curve(y_test_hot.ravel(),svm_y_score.ravel())#計算ROC的值,svm_threasholds為閾值
lr_auc=metrics.auc(lr_fpr,lr_tpr)
'lr_auc:0.5674626772245001'
svm_auc=metrics.auc(lr_fpr,lr_tpr)
'svm_auc:0.5674626772245001'