SVM--交叉驗證

阿新 • • 發佈：2020-11-22

`# -- coding: utf-8 --
"""SVM.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/1a993aXFZd3z39U7eqp6J0Ndvuhrzu1-q
"""

import numpy as np
import time
from scipy.stats import sem
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.model_selection import KFold,GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report #對分類的結果進行綜合性的報告
from sklearn.metrics import confusion_matrix
from sklearn.datasets import fetch_olivetti_faces #用sklearn自帶的資料集 400張人臉

import matplotlib

matplotlib.use('TkAgg')

import matplotlib.pyplot as plt

faces = fetch_olivetti_faces()
print(faces.DESCR)
print(faces.keys())
print(faces.images.shape)
print(faces.data.shape)
print(faces.target.shape)
print("max",np.max(faces.data))
print(np.min(faces.data))
print(np.mean(faces.data))

"""執行過程：SVC --> BaseSVC --> BaseLibSVM --> init初始化引數
SVC：多分類器

C： C-SVC的懲罰引數C，預設值是1.0
kernel ：核函式，預設是rbf，可為如下：
– 'linear'，線性：u'v
– 'poly'，多項式：(gammau'v + coef0)^degree
– 'rbf'，RBF函式：exp(-gamma|u-v|^2)
–'sigmoid'：tanh(gammau'v + coef0)
degree ：多項式poly函式的維度，預設是3，其他核函式會被忽略
gamma ： ‘rbf’,‘poly’ 和‘sigmoid’的核函式引數。預設是’auto’，則會選擇1/- - n_features

coef0 ：核函式的常數項，對於‘poly’和 ‘sigmoid’有用
max_iter ：最大迭代次數，-1為無限制
tol ：停止訓練的誤差值大小，預設為1e-3
decision_function_shape :’ovo‘, ‘ovr’ or None, default=‘ovr’(one vs rest)
random_state ：隨機數種子，資料洗牌時的種子值，int值
主要調節的引數有：C、kernel、degree、gamma、coef0。
"""

交叉驗證

def evaluate_cross_validation(clf,X,y,K):

create a k-fold cross validation iterator

cv = KFold(K,shuffle=True,random_state=0)

score method of the estimator (accuracy)

scores = cross_val_score(clf,X,y,cv=cv)

這裡的clf==之前建的SVC

print(scores)
print("Mean score: {0:.3f} (+/-{1:.3f})".format(
np.mean(scores), sem(scores)))

"""KFold:sklearn/model_selection/_split.py
KFold（n_split, shuffle, random_state）

n_split:要劃分的折數
shuffle: 每次劃分前，是否對資料進行shuffle洗牌打亂
random_state:資料打亂的隨機數種子

KFold過程

1、將資料集平均分割成K等份
2、使用1份資料作為測試資料，其餘K-1份作為訓練資料
3、計算測試準確率
4、使用不同的測試集，重複上面步驟

scores:sklearn/model_selection/_validation.py
作用：驗證某個模型在某個訓練集上的穩定性，輸出k個預測精度。

輸入：clf：分類器；X：資料；y：標籤；cv：交叉驗證，可為k或KFold
輸出：k個劃分的預測結果（分類準確率）

執行過程：呼叫檔案內的cross_validate函式，啟用多個job執行緒並行，每個執行緒處理cv分割出的一份訓練資料和驗證資料。

執行緒呼叫檔案內_fit_and_score函式，_fit_and_score使用分類器的estimator.fit(X_train, y_train)進行訓練，使用檔案內_score函式計算預測結果。

"""

不用交叉驗證

def train_and_evaluate(clf,X_train,X_test,y_train,y_test):
clf.fit(X_train,y_train)
print("Accuracy on training set:")
print(clf.score(X_train, y_train))
print("Accuracy on testing set:")
print(clf.score(X_test, y_test))

y_pred = clf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

from google.colab import drive
drive.mount('/content/drive')

"""作用：根據真實值和預測值計算分類精度的綜合報告

輸入：y_true：1 維陣列，真實資料的分類標籤
y_pred：1 維陣列，模型預測的分類標籤

輸出：每個分類標籤的精確度，召回率和 F1-score。

精確度：precision，正確預測為正的，佔全部預測為正的比例，TP / (TP+FP)

召回率：recall，正確預測為正的，佔全部實際為正的比例，TP / (TP+FN)

F1-score：精確率和召回率的調和平均數，2 * p*r / (p+r)

"""

戴眼鏡的人的標註

# the index ranges of images of people with glasses

glasses = [
(10, 19), (30, 32), (37, 38), (50, 59), (63, 64),
(69, 69), (120, 121), (124, 129), (130, 139), (160, 161),
(164, 169), (180, 182), (185, 185), (189, 189), (190, 192),
(194, 194), (196, 199), (260, 269), (270, 279), (300, 309),
(330, 339), (358, 359), (360, 369)
]

戴眼鏡的標為1，不戴眼鏡的標為0

def create_target(num_sample,segments):
y = np.zeros(num_sample)
for (start,end) in segments:
y[start:end+1] = 1
return y

num_samples = faces.target.shape[0]
target_glasses = create_target(num_samples,glasses)

1 sklearn.model_selection import train_test_split

svc_1 = SVC(kernel = 'linear')
print(svc_1)

原始的：faces.target

X_train,X_test,y_train,y_test = train_test_split(
faces.data,faces.target,test_size=0.25,random_state=0
)
evaluate_cross_validation(svc_1,X_train,y_train,5)
train_and_evaluate(svc_1,X_train,X_test,y_train,y_test)

2 sklearn.model_selection import train_test_split

svc_2 = SVC(kernel='linear')

經過篩選的target：target_glasses（分為0和1類）原始的：faces.target

X_train,X_test,y_train,y_test = train_test_split(
faces.data,target_glasses,test_size=0.25,random_state=0
)
evaluate_cross_validation(svc_2,X_train,y_train,5)
train_and_evaluate(svc_2,X_train,X_test,y_train,y_test)

3

def print_faces(images, target, top_n):
# set up figure size in inches
fig = plt.figure(figsize=(12, 12))
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(top_n):
# we will print images in matrix 20x20
p = fig.add_subplot(20, 20, i + 1, xticks=[], yticks=[])
p.imshow(images[i], cmap=plt.cm.bone)
# label the image with target value 加本文target[i]預測的標籤
p.text(0, 14, str(target[i]))
p.text(0, 60, str(i))

X_test = faces.data[30:40]
y_test = target_glasses[30:40]
print (y_test.shape[0])
select = np.ones(target_glasses.shape[0])
select[30:40] = 0
X_train = faces.data[select == 1]
y_train = target_glasses[select == 1]
print (y_train.shape[0])

svc_3 = SVC(kernel='linear')
train_and_evaluate(svc_3, X_train, X_test, y_train, y_test)
y_pred = svc_3.predict(X_test)
eval_faces = [np.reshape(a, (64, 64)) for a in X_test]
print_faces(eval_faces, y_pred, 10)`

SVM--交叉驗證

matplotlib.use('TkAgg')

交叉驗證

create a k-fold cross validation iterator

score method of the estimator (accuracy)

這裡的clf==之前建的SVC

不用交叉驗證

戴眼鏡的人的標註

戴眼鏡的標為1，不戴眼鏡的標為0

1 sklearn.model_selection import train_test_split

原始的：faces.target

2 sklearn.model_selection import train_test_split

經過篩選的target：target_glasses（分為0和1類）原始的：faces.target

3

SVM--交叉驗證

使用sklearn的cross_val_score進行交叉驗證例項

sklearn和keras的資料切分與交叉驗證的例項詳解

2-機器學習-KNN近鄰演算法分類模型、交叉驗證

KNN+交叉驗證

EBS COA賬戶交叉驗證規則

巢狀交叉驗證

python:交叉驗證中的報錯

pytorch（二十一）：交叉驗證

matlab.10折交叉驗證

nyuv240類資料集_機器學習：資料劃分與交叉驗證

Sklearn中交叉驗證 KFold

機器學習中模型的診斷、改進、選擇——偏差、方差、欠擬合、過擬合、交叉驗證

《機器學習》西瓜書課後習題3.4——python解交叉驗證和留一法的對率迴歸錯誤率

機器學習sklearn（二十一）：模型評估（一）交叉驗證：評估估算器的表現（一）簡介

機器學習sklearn（二十二）：模型評估（二）交叉驗證：評估估算器的表現（二）計算交叉驗證的指標

機器學習sklearn（二十三）：模型評估（三）交叉驗證：評估估算器的表現（三）交叉驗證迭代器

k近鄰8-交叉驗證，網格搜尋優化模型

scikit基礎與機器學習入門（11）欠擬合，過擬合和交叉驗證

交叉驗證

SVM--交叉驗證

matplotlib.use('TkAgg')

交叉驗證

create a k-fold cross validation iterator

score method of the estimator (accuracy)

這裡的clf==之前建的SVC

不用交叉驗證

戴眼鏡的人的標註

戴眼鏡的標為1，不戴眼鏡的標為0

1 sklearn.model_selection import train_test_split

原始的：faces.target

2 sklearn.model_selection import train_test_split

經過篩選的target：target_glasses（分為0和1類） 原始的：faces.target

3

相關推薦

經過篩選的target：target_glasses（分為0和1類）原始的：faces.target