scikit-learn的基本用法(五)——交叉驗證1
阿新 • • 發佈:2019-01-27
文章作者:Tyan
部落格:noahsnail.com | CSDN | 簡書
本文主要介紹scikit-learn中的交叉驗證。
- Demo 1
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
# 載入iris資料集
iris = datasets.load_iris()
# 讀取特徵
X = iris.data
# 讀取分類標籤
y = iris.target
# 定義分類器
knn = KNeighborsClassifier(n_neighbors = 5)
# 進行交叉驗證資料評估, 資料分為5部分, 每次用一部分作為測試集
scores = cross_val_score(knn, X, y, cv = 5, scoring = 'accuracy')
# 輸出5次交叉驗證的準確率
print scores
- 結果
[ 0.96666667 1. 0.93333333 0.96666667 1. ]
- Demo 2
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
# 確定knn中k的取值
# 載入iris資料集
iris = datasets.load_iris()
# 讀取特徵
X = iris.data
# 讀取分類標籤
y = iris.target
# 定義knn中k的取值, 0-10
k_range = range(1, 30)
# 儲存k對應的準確率
k_scores = []
# 計算每個k取值對應的準確率
for k in k_range:
# 獲得knn分類器
knn = KNeighborsClassifier(n_neighbors = k)
# 對資料進行交叉驗證求準確率
scores = cross_val_score(knn, X, y, cv = 10, scoring = 'accuracy')
# 儲存交叉驗證結果的準確率均值
k_scores.append(scores.mean())
# 繪製k取不同值時的準確率變化影象
plt.plot(k_range, k_scores)
plt.xlabel('K Value in KNN')
plt.ylabel('Cross-Validation Mean Accuracy')
plt.show()
- 結果