支援向量機——非線性分類SVM
阿新 • • 發佈:2019-01-02
模型原型
sklearn.svm.SVC(C=1.0,kernel=’rbf’,degree=3,gamma=’auto’,coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200, class_weight=None,verbose=False,max_iter=-1,decision_function_shape=None,random_state=None)
引數
- C
- kernel
- degree
- gamma
- coef0
- shrinking:是否使用啟發式收縮(shrinking heuristic)
- probability:是否進行概率估計(必須在訓練之前設定好,且概率估計會拖慢訓練速度)
- tol
- cache_size:指定了kernel cache的大小,單位為MB
- class_weight
- verbose
- max_iter
- decision_function_shape:指定決策函式的形狀
- ’ovr’:使用one-vs-rest準則,決策函式的形狀是(n_samples,n_classes)
- ‘ovo’:使用one-vs-one準則,決策函式的形狀是(n_samples,n_classes*(n_classes-1)/2)
- None:預設值
- random_state
屬性
- support_:一個數組,形狀為[n_SV],支援向量的下標
- supportvectors:一個數組,形狀為[n_SV,n_features],支援向量
- n_support:一個數組-like,形狀為[n_class],每一個分類的支援向量的個數
- dual_coef:一個數組,形狀為[n_class-1,n_SV] (對偶問題中,在分類決策函式中每個支援向量的係數)
- coef_:一個數組,形狀為[n_class-1,n_features] (原始問題中,每個特徵的係數,只在linear
kernel中有效) - intercept_:一個數組,形狀為[n_class*(n_class)/2]決策函式中的常數項
方法
- fit(X,y[,sample_weight])
- predict(X)
- score(X,y[,sample_weight])
- predict_log_proba(X)
- predict_proba(X)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,cross_validation,svm
載入資料
def load_data_classfication():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)
不同的核的影響
#線性核
def test_SVC_linear(*data):
X_train,X_test,y_train,y_test=data
cls=svm.SVC(kernel='linear')
cls.fit(X_train,y_train)
print('Coefficients:%s,\nintercept %s'%(cls.coef_,cls.intercept_))
print('Score:%.2f'%cls.score(X_test,y_test))
X_train,X_test,y_train,y_test=load_data_classfication()
test_SVC_linear(X_train,X_test,y_train,y_test)
#多項式核
def test_SVC_poly(*data):
X_train,X_test,y_train,y_test=data
fig=plt.figure()
#測試degree
degrees=range(1,20)
train_scores=[]
test_scores=[]
for degree in degrees:
cls=svm.SVC(kernel='poly',degree=degree)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
ax=fig.add_subplot(1,3,1)
ax.plot(degrees,train_scores,label="Training score",marker='x')
ax.plot(degrees,test_scores,label='Testing score',marker='o')
ax.set_title('SVC_poly_degree')
ax.set_xlabel('p')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
#測試gamma
gammas=range(1,20)
train_scores=[]
test_scores=[]
for gamma in gammas:
cls=svm.SVC(kernel='poly',gamma=gamma,degree=3)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
ax=fig.add_subplot(1,3,2)
ax.plot(gammas,train_scores,label='Training score',marker='+')
ax.plot(gammas,test_scores,label='Testing score',marker='o')
ax.set_title('SVC_poly_gamma')
ax.set_xlabel(r'$\gamma$')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
#測試r
rs=range(20)
train_scores=[]
test_scores=[]
for r in rs:
cls=svm.SVC(kernel='poly',gamma=10,degree=3,coef0=r)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
ax=fig.add_subplot(1,3,3)
ax.plot(rs,train_scores,label="Training score",marker='+')
ax.plot(rs,test_scores,label='Testing scores',marker='o')
ax.set_title('SVC_poly_r')
ax.set_xlabel(r'r')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
plt.show()
test_SVC_poly(X_train,X_test,y_train,y_test)
#高斯核
def test_SVC_rbf(*data):
X_train,X_test,y_train,y_test=data
gammas=range(1,20)
train_scores=[]
test_scores=[]
for gamma in gammas:
cls=svm.SVC(kernel='rbf',gamma=gamma)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(gammas,train_scores,label="Training score",marker='+')
ax.plot(gammas,test_scores,label='Testing score',marker='o')
ax.set_title('SVC_rbf')
ax.set_xlabel(r'$\gamma$')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
plt.show()
test_SVC_rbf(X_train,X_test,y_train,y_test)
#sigmoid核
def test_SVC_sigmoid(*data):
X_train,X_test,y_train,y_test=data
fig=plt.figure()
#測試gamma
gammas=np.logspace(-2,1)
train_scores=[]
test_scores=[]
for gamma in gammas:
cls=svm.SVC(kernel='sigmoid',gamma=gamma,coef0=0)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
ax=fig.add_subplot(1,2,1)
ax.plot(gammas,train_scores,label='Training score',marker='+')
ax.plot(gammas,test_scores,label="testing score",marker='o')
ax.set_title('SVC_sigmoid_gammas')
ax.set_xscale('log')
ax.set_xlabel(r'$\gamma$')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
#測試r
rs=np.linspace(0,5)
train_scores=[]
test_scores=[]
for r in rs:
cls=svm.SVC(kernel='sigmoid',coef0=r,gamma=0.01)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
ax=fig.add_subplot(1,2,2)
ax.plot(rs,train_scores,label="Training score",marker='+')
ax.plot(rs,test_scores,label='Testing score',marker='o')
ax.set_title('SVC_sigmoid_r')
ax.set_xlabel(r'r')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.legend(loc='best',framealpha=0.5)
plt.show()
test_SVC_sigmoid(X_train,X_test,y_train,y_test)