客戶貸款逾期預測[4]-記錄評分、繪製roc曲線
阿新 • • 發佈:2018-12-04
任務
記錄五個模型(邏輯迴歸、svm、決策樹、xgboost、lightgbm)關於precision、recall score、f1 score、roc、aoc的評分表格。
實現
# -*- coding: utf-8 -*- """ Created on Thu Nov 15 13:02:11 2018 @author: keepi """ import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler import warnings warnings.filterwarnings('ignore') #匯入資料 data = pd.read_csv('data.csv',encoding='gb18030') print("data.shape:",data.shape) #資料處理 miss_rate = data.isnull().sum() / len(data) #print("缺失率:",miss_rate.sort_values(ascending=False)) X_num = data.select_dtypes('number').copy() X_num.fillna(X_num.mean(),inplace=True) print("數值型特徵的shape:",X_num.shape) print(X_num.columns) X_num.drop(['Unnamed: 0','status'],axis=1,inplace=True) X_str = data.select_dtypes(exclude='number').copy() X_str.fillna(0,inplace=True) print("非數值型特徵:",X_str.columns) print(X_str.head()) X_dummy = pd.get_dummies(X_str['reg_preference_for_trad']) X = pd.concat([X_num,X_dummy],axis=1,sort=False) y = data['status'] #劃分訓練集、測試集 X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1117) #歸一化 ss = StandardScaler() X_train_std = ss.fit_transform(X_train) X_test_std = ss.transform(X_test) #邏輯迴歸模型 from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr.fit(X_train_std,y_train) #svm from sklearn.svm import SVC svm_linear = SVC(kernel = 'linear',probability=True).fit(X_train_std,y_train) #決策樹模型 from sklearn.tree import DecisionTreeClassifier dtc = DecisionTreeClassifier(max_depth=8) dtc.fit(X_train_std,y_train) #xgboost sklearn版 from xgboost.sklearn import XGBClassifier xgbc = XGBClassifier() xgbc.fit(X_train_std,y_train) #lightgbm sklearn版 from lightgbm.sklearn import LGBMClassifier lgb = LGBMClassifier() lgb.fit(X_train_std,y_train) print('all done!') #模型評估 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.metrics import roc_auc_score, roc_curve, auc import matplotlib.pyplot as plt def draw_metrics(clf, X_train, X_test, y_train, y_test): y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) y_train_prob = clf.predict_proba(X_train)[:,1] y_test_prob = clf.predict_proba(X_test)[:,1] #準確率 print('準確率:\n') print('訓練集: ','%.4f'%accuracy_score(y_train,y_train_pred), end=' ') print('測試集: ','%4f'%accuracy_score(y_test,y_test_pred),end=' ') #精準率 print('精準率:\n') print('訓練集: ','%.4f'%precision_score(y_train,y_train_pred),end=' ') print('測試集: ','%.4f'%precision_score(y_test,y_test_pred),end=' ') #召回率 print('召回率:\n') print('訓練集: ','%.4f'%recall_score(y_train,y_train_pred),end=' ') print('測試集: ','%.4f'%recall_score(y_test,y_test_pred),end=' ') #f1_score print('f1-score:\n') print('訓練集: ','%.4f'%f1_score(y_train,y_train_pred),end=' ') print('測試集: ','%.4f'%f1_score(y_test,y_test_pred),end=' ') #auc print('auc:\n') print('訓練集: ','%.4f'%roc_auc_score(y_train,y_train_prob),end=' ') print('測試集: ','%.4f'%roc_auc_score(y_test,y_test_prob),end=' ') #roc曲線 fpr_train, tpr_train, thred_train = roc_curve(y_train,y_train_prob,pos_label=1) fpr_test, tpr_test, thred_test = roc_curve(y_test,y_test_prob,pos_label=1) label = ['Train - AUC:{:.4f}'.format(auc(fpr_train,tpr_train)), 'Test - AUC:{:.4f}'.format(auc(fpr_test,tpr_test))] plt.plot(fpr_train,tpr_train) plt.plot(fpr_test,tpr_test) plt.plot([0,1],[0,1],'d--') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(label, loc = 4) plt.title('ROC Curve') draw_metrics(lr, X_train_std,X_test_std,y_train,y_test) draw_metrics(svm_linear, X_train_std,X_test_std,y_train,y_test) draw_metrics(dtc, X_train_std,X_test_std,y_train,y_test) draw_metrics(xgbc, X_train_std,X_test_std,y_train,y_test) draw_metrics(lgb, X_train_std,X_test_std,y_train,y_test)
評估結果
accuracy | precision | recall | f1 score |
roc-auc score | ROC曲線 | |
邏輯迴歸 |
訓練集:0.8019 測試集:0.7772 |
訓練集:0.7181 測試集:0.6384 |
訓練集:0.3604 測試集:0.3763 |
訓練集:0.4799 測試集:0.4753 |
訓練集:0.8157 測試集:0.7775 |
|
svm線性核 | 訓練集: 0.8019 測試集: 0.7680 |
訓練集: 0.7790 測試集: 0.6690 |
訓練集: 0.2645 測試集: 0.2553 |
訓練集: 0.3949 測試集: 0.3695 |
訓練集: 0.8131 測試集: 0.7825 |
|
決策樹 | 訓練集: 0.8954 測試集: 0.7337 |
訓練集: 0.8756 測試集: 0.5000 |
訓練集: 0.6667 測試集: 0.3816 |
訓練集: 0.7570 測試集: 0.4328 | 訓練集: 0.9060 測試集: 0.6481 | |
xgboost | 訓練集: 0.8584 測試集: 0.7842 | 訓練集: 0.8800 測試集: 0.6651 |
訓練集: 0.4871 測試集: 0.3816 |
訓練集: 0.6271 測試集: 0.4849 | 訓練集: 0.9156 測試集: 0.7855 | |
lightgbm | 訓練集: 0.9976 測試集: 0.7730 | 訓練集: 1.0000 測試集: 0.6120 | 訓練集: 0.9902 測試集: 0.4026 | 訓練集: 0.9951 測試集: 0.4857 | 訓練集: 1.0000 測試集: 0.7764 |