機器學習一個小目標——Task4
阿新 • • 發佈:2018-11-21
任務【模型評估】
記錄五個模型關於precision,rescore,f1,auc,roc的評分表格,畫出auc和roc曲線圖
遇到難題
實驗程式碼
#!/usr/bin/env python 3.6
#-*- coding:utf-8 -*-
# @File : Model_evaluation.py
# @Date : 2018-11-20
# @Author : 黑桃
# @Software: PyCharm
import pickle
from matplotlib import pyplot as plt
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, roc_curve
path = "E:/MyPython/Machine_learning_GoGoGo/"
"""=====================================================================================================================
1 讀取特徵
"""
print("0 讀取特徵")
f = open (path + 'feature/feature_V1.pkl', 'rb')
train, test, y_train,y_test= pickle.load(f)
f.close()
"""=====================================================================================================================
2 讀取模型
"""
print("1 讀取模型")
SVM_linear = joblib.load( path + "model/SVM_linear.pkl")
SVM_poly = joblib.load( path + "model/SVM_poly.pkl")
SVM_rbf = joblib.load( path + "model/SVM_rbf.pkl")
SVM_sigmoid = joblib.load( path + "model/SVM_sigmoid.pkl")
lg_120 = joblib.load( path + "model/lg_120.pkl")
DT = joblib.load( path + "model/DT.pkl")
xgb_sklearn = joblib.load( path + "model/xgb_sklearn.pkl")
lgb_sklearn = joblib.load( path + "model/lgb_sklearn.pkl")
xgb = joblib.load( path + "model/xgb.pkl")
lgb = joblib.load( path + "model/lgb.pkl")
"""=====================================================================================================================
3 模型評估
"""
def model_evalua(clf, X_train, X_test, y_train, y_test,clf_name):
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
y_train_pred_proba = clf.predict_proba(X_train)[:, 1]
y_test_pred_proba = clf.predict_proba(X_test)[:, 1]
"""【AUC Score】"""
print('AUC Score')
print("Train_AUC Score :{:.4f}".format(roc_auc_score(y_train, y_train_pred)))
print("Test_AUC Score :{:.4f}".format(roc_auc_score(y_test, y_test_pred)))
"""【準確性】"""
print('準確性:')
print('Train_準確性:{:.4f}'.format(accuracy_score(y_train, y_train_pred)))
print('Test_準確性:{:.4f}'.format(accuracy_score(y_test, y_test_pred)))
"""【召回率】"""
print('召回率:')
print('Train_召回率:{:.4f}'.format(recall_score(y_train, y_train_pred)))
print('Test_召回率:{:.4f}'.format(recall_score(y_test, y_test_pred)))
"""【f1_score】"""
print('f1_score:')
print('Train_f1_score:{:.4f}'.format(f1_score(y_train, y_train_pred)))
print('Test_f1_score:{:.4f}'.format(f1_score(y_test, y_test_pred)))
#描繪 ROC 曲線
fpr_tr, tpr_tr, _ = roc_curve(y_train, y_train_pred_proba)
fpr_te, tpr_te, _ = roc_curve(y_test, y_test_pred_proba)
# KS
print('KS:')
print('Train:{:.4f}'.format(max(abs((fpr_tr - tpr_tr)))))
print('Test:{:.4f}'.format(max(abs((fpr_te - tpr_te)))))
plt.plot(fpr_tr, tpr_tr, 'r-',
label = "Train:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_train, y_train_pred_proba),
max(abs((fpr_tr - tpr_tr)))))
plt.plot(fpr_te, tpr_te, 'g-',
label="Test:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_test, y_test_pred_proba),
max(abs((fpr_tr - tpr_tr)))))
plt.plot([0, 1], [0, 1], 'd--')
plt.legend(loc='best')
plt.title(clf_name + "ROC curse")
plt.savefig(path +'picture/'+clf_name+'.jpg')
plt.show()
print('-------------------SVM_linear-------------------')
model_evalua(SVM_linear, train, test, y_train, y_test,'SVM_linear')
print('-------------------SVM_poly-------------------:')
model_evalua(SVM_poly, train, test, y_train, y_test,'SVM_poly')
print('-------------------SVM_rbf-------------------:')
model_evalua(SVM_rbf, train, test, y_train, y_test,'SVM_rbf')
print('-------------------SVM_sigmoid-------------------:')
model_evalua(SVM_sigmoid, train, test, y_train, y_test,'SVM_sigmoid')
print('-------------------lg_120-------------------')
model_evalua(lg_120, train, test, y_train, y_test,'lg_120')
print('-------------------DT-------------------')
model_evalua(DT, train, test, y_train, y_test,'DT')
print('-------------------xgb_sklearn-------------------')
model_evalua(xgb_sklearn, train, test, y_train, y_test,'xgb_sklearn')
# print('-------------------xgb-------------------')
# model_evalua(xgb, train, test, y_train, y_test)
print('-------------------lgb_sklearn-------------------')
model_evalua(lgb_sklearn, train, test, y_train, y_test,'lgb_sklearn')
# print('-------------------lgb-------------------')
# model_evalua(lgb, train, test, y_train, y_test)
實驗結果
precision | recall | f1_score | KS | ROC_AUC | ROC曲線 | ||
---|---|---|---|---|---|---|---|
SVM_linear | Train_準確性:0.7878Test_準確性:0.7442 | Train_召回率:0.1683Test_召回率:0.3377 | Train_f1_score:0.2781 Test_f1_score:0.4160 | Train:0.4519 Test:0.2590 | Train_AUC Score :0.5774 Test_AUC Score :0.6160 | ||
SVM_poly | Train_準確性:0.7815 Test_準確性:0.7267 | Train_召回率:0.1027 Test_召回率:0.0597 | Train_f1_score:0.1859 Test_f1_score:0.1055 | Train:0.7099 Test:0.3082 | Train_AUC Score :0.5510 Test_AUC Score :0.5164 | ||
SVM_rbf | Train_準確性:0.7971 Test_準確性:0.7589 | Train_召回率:0.1894 Test_召回率:0.1455 | Train_f1_score:0.3119 Test_f1_score:0.2456 | Train:0.6474 Test:0.3723 | Train_AUC Score :0.5907 Test_AUC Score :0.5655 | ||
SVM_sigmoid | Train_準確性:0.7265 Test_準確性:0.7092 | Train_召回率:0.2809 Test_召回率:0.1584 | Train_f1_score:0.3328 Test_f1_score:0.2272 | Train:0.2216 Test:0.1235 | Train_AUC Score :0.5752 Test_AUC Score :0.5356 | ||
lg_120 | Train_準確性:0.4355 Test_準確性:0.4590 | Train_召回率:0.6671 Test_召回率:0.7117 | Train_f1_score:0.3647 Test_f1_score:0.4152 | Train:0.0695 Test:0.0907 | Train_AUC Score :0.5142 Test_AUC Score :0.5387 | ||
DT | Train_準確性:0.7920 Test_準確性:0.7505 | Train_召回率:0.4245 Test_召回率:0.3169 | Train_f1_score:0.4978 Test_f1_score:0.4067 | Train:0.4126 Test:0.3524 | Train_AUC Score :0.6672 Test_AUC Score :0.6138 | ||
xgb_sklearn | Train_準確性:0.8452 Test_準確性:0.7765 | Train_召回率:0.4691 Test_召回率:0.3065 | Train_f1_score:0.5954 Test_f1_score:0.4252 | Train:0.6167 Test:0.3763 | Train_AUC Score :0.7175 Test_AUC Score :0.6283 | ||
lgb_sklearn | Train_準確性:1.0000 Test_準確性:0.7680 | Train_召回率:1.0000 Test_召回率:0.3117 | Train_f1_score:1.0000 Test_f1_score:0.4203 | Train:1.0000 Test:0.3761 | Train_AUC Score :1.0000 Test_AUC Score :0.6242 |
參考文獻
ML實操 - 貸款使用者逾期情況分析
ML - 貸款使用者逾期情況分析
python matplotlib 畫圖儲存圖片簡單例子
sklearn.metrics中的評估方法介紹(accuracy_score, recall_score, roc_curve, roc_auc_score, confusion_matrix)