sklearn SVM演算法自動調優
阿新 • • 發佈:2018-12-31
#匯入,處理資料集
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
dataset1=pd.read_excel("https://192.168.0.113:8443/oc9/index.php/s/g88pPagI4ecuEsI/download")
for i in range(np.shape(dataset1)[0]):
lei=dataset1.iloc[i,-1 ]
lei=lei.split('_')[1]
dataset1.iloc[i,-1]=float(lei)
target=np.array(dataset1['class']) #類標籤
dataset=np.array(dataset1.iloc[:,1:-1]) #資料集
target=np.array([target[i] for i in range(len(target))])
#匯入SVC函式,分割資料集
svr = svm.SVC()
parameters = {'C':[0.001,0.003,0.006,0.009,0.01,0.04,0.08,0.1],
'kernel' :('linear','rbf',),
'gamma':[0.001,0.005,0.1,0.15,0.20,0.23,0.27],
'decision_function_shape':['ovo','ovr'],
'class_weight':[{1:7,2:1.83,3:3.17}],
}
X_train, X_test, y_train, y_test=train_test_split(dataset,target,test_size=.4,random_state=1)
#GridSearchCV,sklearn的自動調優函式
clf = GridSearchCV(svr, parameters)
clf.fit(X_train, y_train)
#使用a儲存調優後的引數結果
a=pd.DataFrame(clf.cv_results_)
#按照mean_test_score降序排列
a.sort(['mean_test_score'],ascending=False)
#輸出最好的分類器引數,以及測試集的平均分類正確率
clf.best_estimator_,clf.best_score_
'''結果:(SVC(C=0.1, cache_size=200, class_weight={1: 7, 2: 1.83, 3: 3.17}, coef0=0.0,
decision_function_shape='ovo', degree=3, gamma=0.2, kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False), 0.53831417624521072)'''