1. 程式人生 > 程式設計 >python實現二分類和多分類的ROC曲線教程

python實現二分類和多分類的ROC曲線教程

基本概念

precision:預測為對的當中,原本為對的比例(越大越好,1為理想狀態)

recall:原本為對的當中,預測為對的比例(越大越好,1為理想狀態)

F-measure:F度量是對準確率和召回率做一個權衡(越大越好,1為理想狀態,此時precision為1,recall為1)

accuracy:預測對的(包括原本是對預測為對,原本是錯的預測為錯兩種情形)佔整個的比例(越大越好,1為理想狀態)

fp rate:原本是錯的預測為對的比例(越小越好,0為理想狀態)

tp rate:原本是對的預測為對的比例(越大越好,1為理想狀態)

ROC曲線通常在Y軸上具有真陽性率,在X軸上具有假陽性率。這意味著圖的左上角是“理想”點 - 誤報率為零,真正的正率為1。這不太現實,但它確實意味著曲線下面積(AUC)通常更好。

二分類問題:ROC曲線

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
start_time = time.time()
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,accuracy_score
from sklearn.metrics import precision_score,f1_score
from keras.optimizers import Adam,SGD,sgd
from keras.models import load_model

print('讀取資料')
X_train = np.load('x_train-rotate_2.npy')
Y_train = np.load('y_train-rotate_2.npy')
print(X_train.shape)
print(Y_train.shape)

print('獲取測試資料和驗證資料')
X_train,X_valid,Y_train,Y_valid = train_test_split(X_train,test_size=0.1,random_state=666)

Y_train = np.asarray(Y_train,np.uint8)
Y_valid = np.asarray(Y_valid,np.uint8)
X_valid = np.array(X_valid,np.float32) / 255.

print('獲取模型')
model = load_model('./model/InceptionV3_model.h5')
opt = Adam(lr=1e-4)
model.compile(optimizer=opt,loss='binary_crossentropy')

print("Predicting")
Y_pred = model.predict(X_valid)
Y_pred = [np.argmax(y) for y in Y_pred] # 取出y中元素最大值所對應的索引
Y_valid = [np.argmax(y) for y in Y_valid]

# micro:多分類  
# weighted:不均衡數量的類來說,計算二分類metrics的平均
# macro:計算二分類metrics的均值,為每個類給出相同權重的分值。
precision = precision_score(Y_valid,Y_pred,average='weighted')
recall = recall_score(Y_valid,average='weighted')
f1_score = f1_score(Y_valid,average='weighted')
accuracy_score = accuracy_score(Y_valid,Y_pred)
print("Precision_score:",precision)
print("Recall_score:",recall)
print("F1_score:",f1_score)
print("Accuracy_score:",accuracy_score)

# 二分類 ROC曲線
# roc_curve:真正率(True Positive Rate,TPR)或靈敏度(sensitivity)
# 橫座標:假正率(False Positive Rate,FPR)
fpr,tpr,thresholds_keras = roc_curve(Y_valid,Y_pred)
auc = auc(fpr,tpr)
print("AUC : ",auc)
plt.figure()
plt.plot([0,1],[0,'k--')
plt.plot(fpr,label='Keras (area = {:.3f})'.format(auc))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.savefig("../images/ROC/ROC_2分類.png")
plt.show()

print("--- %s seconds ---" % (time.time() - start_time))

ROC圖如下所示:

python實現二分類和多分類的ROC曲線教程

多分類問題:ROC曲線

ROC曲線通常用於二分類以研究分類器的輸出。為了將ROC曲線和ROC區域擴充套件到多類或多標籤分類,有必要對輸出進行二值化。⑴可以每個標籤繪製一條ROC曲線。⑵也可以通過將標籤指示符矩陣的每個元素視為二元預測(微平均)來繪製ROC曲線。⑶另一種用於多類別分類的評估方法是巨集觀平均,它對每個標籤的分類給予相同的權重。

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
start_time = time.time()
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,sgd
from keras.models import load_model
from itertools import cycle
from scipy import interp
from sklearn.preprocessing import label_binarize

nb_classes = 5
print('讀取資料')
X_train = np.load('x_train-resized_5.npy')
Y_train = np.load('y_train-resized_5.npy')
print(X_train.shape)
print(Y_train.shape)

print('獲取測試資料和驗證資料')
X_train,np.uint8)
X_valid = np.asarray(X_valid,np.float32) / 255.

print('獲取模型')
model = load_model('./model/SE-InceptionV3_model.h5')
opt = Adam(lr=1e-4)
model.compile(optimizer=opt,loss='categorical_crossentropy')

print("Predicting")
Y_pred = model.predict(X_valid)
Y_pred = [np.argmax(y) for y in Y_pred] # 取出y中元素最大值所對應的索引
Y_valid = [np.argmax(y) for y in Y_valid]

# Binarize the output
Y_valid = label_binarize(Y_valid,classes=[i for i in range(nb_classes)])
Y_pred = label_binarize(Y_pred,classes=[i for i in range(nb_classes)])

# micro:多分類  
# weighted:不均衡數量的類來說,計算二分類metrics的平均
# macro:計算二分類metrics的均值,為每個類給出相同權重的分值。
precision = precision_score(Y_valid,average='micro')
recall = recall_score(Y_valid,average='micro')
f1_score = f1_score(Y_valid,average='micro')
accuracy_score = accuracy_score(Y_valid,accuracy_score)

# roc_curve:真正率(True Positive Rate,FPR)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(nb_classes):
 fpr[i],tpr[i],_ = roc_curve(Y_valid[:,i],Y_pred[:,i])
 roc_auc[i] = auc(fpr[i],tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"],tpr["micro"],_ = roc_curve(Y_valid.ravel(),Y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"],tpr["micro"])

# Compute macro-average ROC curve and ROC area

# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(nb_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(nb_classes):
 mean_tpr += interp(all_fpr,fpr[i],tpr[i])

# Finally average it and compute AUC
mean_tpr /= nb_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"],tpr["macro"])

# Plot all ROC curves
lw = 2
plt.figure()
plt.plot(fpr["micro"],label='micro-average ROC curve (area = {0:0.2f})'
  ''.format(roc_auc["micro"]),color='deeppink',linestyle=':',linewidth=4)

plt.plot(fpr["macro"],tpr["macro"],label='macro-average ROC curve (area = {0:0.2f})'
  ''.format(roc_auc["macro"]),color='navy',linewidth=4)

colors = cycle(['aqua','darkorange','cornflowerblue'])
for i,color in zip(range(nb_classes),colors):
 plt.plot(fpr[i],color=color,lw=lw,label='ROC curve of class {0} (area = {1:0.2f})'
  ''.format(i,roc_auc[i]))

plt.plot([0,'k--',lw=lw)
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.savefig("../images/ROC/ROC_5分類.png")
plt.show()

print("--- %s seconds ---" % (time.time() - start_time))

ROC圖如下所示:

python實現二分類和多分類的ROC曲線教程

以上這篇python實現二分類和多分類的ROC曲線教程就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支援我們。