1. 程式人生 > >客戶貸款逾期預測[4]-記錄評分、繪製roc曲線

客戶貸款逾期預測[4]-記錄評分、繪製roc曲線

任務

       記錄五個模型(邏輯迴歸、svm、決策樹、xgboost、lightgbm)關於precision、recall score、f1 score、roc、aoc的評分表格。

實現

# -*- coding: utf-8 -*-
"""
Created on Thu Nov 15 13:02:11 2018

@author: keepi
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

#匯入資料
data = pd.read_csv('data.csv',encoding='gb18030')
print("data.shape:",data.shape)
#資料處理
miss_rate = data.isnull().sum() / len(data)
#print("缺失率:",miss_rate.sort_values(ascending=False))
X_num = data.select_dtypes('number').copy()
X_num.fillna(X_num.mean(),inplace=True)
print("數值型特徵的shape:",X_num.shape)
print(X_num.columns)
X_num.drop(['Unnamed: 0','status'],axis=1,inplace=True)

X_str = data.select_dtypes(exclude='number').copy()
X_str.fillna(0,inplace=True)
print("非數值型特徵:",X_str.columns)
print(X_str.head())

X_dummy = pd.get_dummies(X_str['reg_preference_for_trad'])
X = pd.concat([X_num,X_dummy],axis=1,sort=False)
y = data['status']

#劃分訓練集、測試集
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1117)

#歸一化
ss = StandardScaler()
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.transform(X_test)

#邏輯迴歸模型
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train_std,y_train)

#svm
from sklearn.svm import SVC
svm_linear = SVC(kernel = 'linear',probability=True).fit(X_train_std,y_train)

#決策樹模型
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(max_depth=8)
dtc.fit(X_train_std,y_train)

#xgboost sklearn版
from xgboost.sklearn import XGBClassifier
xgbc = XGBClassifier()
xgbc.fit(X_train_std,y_train)

#lightgbm sklearn版
from lightgbm.sklearn import LGBMClassifier
lgb = LGBMClassifier()
lgb.fit(X_train_std,y_train)

print('all done!')


#模型評估
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt

def draw_metrics(clf, X_train, X_test, y_train, y_test):
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)
    
    y_train_prob = clf.predict_proba(X_train)[:,1]
    y_test_prob = clf.predict_proba(X_test)[:,1]
    
    #準確率
    print('準確率:\n')
    print('訓練集: ','%.4f'%accuracy_score(y_train,y_train_pred), end=' ')
    print('測試集: ','%4f'%accuracy_score(y_test,y_test_pred),end=' ')
    
    #精準率
    print('精準率:\n')
    print('訓練集: ','%.4f'%precision_score(y_train,y_train_pred),end=' ')
    print('測試集: ','%.4f'%precision_score(y_test,y_test_pred),end=' ')
    
    #召回率
    print('召回率:\n')
    print('訓練集: ','%.4f'%recall_score(y_train,y_train_pred),end=' ')
    print('測試集: ','%.4f'%recall_score(y_test,y_test_pred),end=' ')
    
    #f1_score
    print('f1-score:\n')
    print('訓練集: ','%.4f'%f1_score(y_train,y_train_pred),end=' ')
    print('測試集: ','%.4f'%f1_score(y_test,y_test_pred),end=' ')
    
    #auc
    print('auc:\n')
    print('訓練集: ','%.4f'%roc_auc_score(y_train,y_train_prob),end=' ')
    print('測試集: ','%.4f'%roc_auc_score(y_test,y_test_prob),end=' ')
    
    #roc曲線
    fpr_train, tpr_train, thred_train = roc_curve(y_train,y_train_prob,pos_label=1)
    fpr_test, tpr_test, thred_test = roc_curve(y_test,y_test_prob,pos_label=1)
    
    label = ['Train - AUC:{:.4f}'.format(auc(fpr_train,tpr_train)),
             'Test - AUC:{:.4f}'.format(auc(fpr_test,tpr_test))]
    plt.plot(fpr_train,tpr_train)
    plt.plot(fpr_test,tpr_test)
    plt.plot([0,1],[0,1],'d--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(label, loc = 4)
    plt.title('ROC Curve')
    
    
draw_metrics(lr, X_train_std,X_test_std,y_train,y_test)
draw_metrics(svm_linear, X_train_std,X_test_std,y_train,y_test)
draw_metrics(dtc, X_train_std,X_test_std,y_train,y_test)
draw_metrics(xgbc, X_train_std,X_test_std,y_train,y_test)
draw_metrics(lgb, X_train_std,X_test_std,y_train,y_test)

評估結果

  accuracy precision recall

f1 score

roc-auc score ROC曲線

邏輯迴歸

訓練集:0.8019

測試集:0.7772

訓練集:0.7181

測試集:0.6384

訓練集:0.3604

測試集:0.3763

訓練集:0.4799

測試集:0.4753

訓練集:0.8157

測試集:0.7775

svm線性核

訓練集:  0.8019

測試集:  0.7680

訓練集:  0.7790

測試集:  0.6690

訓練集:  0.2645

測試集:  0.2553

訓練集:  0.3949

測試集:  0.3695

訓練集:  0.8131

測試集:  0.7825

決策樹

訓練集:  0.8954

測試集:  0.7337

訓練集:  0.8756

測試集:  0.5000

訓練集:  0.6667

測試集:  0.3816

訓練集:  0.7570 測試集:  0.4328 訓練集:  0.9060 測試集:  0.6481  
xgboost 訓練集:  0.8584 測試集:  0.7842

訓練集:  0.8800

測試集:  0.6651

訓練集:  0.4871

測試集:  0.3816

訓練集:  0.6271 測試集:  0.4849  訓練集:  0.9156 測試集:  0.7855 
lightgbm 訓練集:  0.9976 測試集:  0.7730 訓練集:  1.0000 測試集:  0.6120 訓練集:  0.9902 測試集:  0.4026 訓練集:  0.9951 測試集:  0.4857 訓練集:  1.0000 測試集:  0.7764