1. 程式人生 > 實用技巧 >多演算法融合2_SVM

多演算法融合2_SVM

#支援向量機演算法原理及實現
#(一)sklearn中利用SVM演算法解決分類問題
import numpy as np
import matplotlib.pyplot as plt

#1-1 多演算法融合思想的使用——KNN演算法引數尋優
from sklearn.feature_selection import SelectKBest
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold #交叉驗證Kfold方式
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score #匯入整體模型的準確度
from sklearn.metrics import confusion_matrix #匯入整體模型的混淆矩陣
from sklearn.metrics import precision_score #匯入整體模型的精準率
from sklearn.metrics import recall_score #匯入整體模型的召回率
from sklearn.metrics import f1_score
#利用管道pipeline來進行多項式核函式的SVM演算法三步—多項式迴歸特徵增加-資料歸一化-線性SVM演算法
from sklearn.preprocessing import PolynomialFeatures #輸入多項式迴歸模型
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
#匯入所需要訓練的資料集
finaldata=pd.read_excel("C:/Users/y50014900/Desktop/過程測試_033GRR10L4105623_20200601-20200708_IL_DM_異常檢測分類結果.xlsx")
feature=["p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p20","p21","p22","p23"]
DM_target1=["DM1"]
DM_target2=["DM2"]
x=finaldata.iloc[:,2:71]
print(x)
x=np.array(x) #對資料的輸入需要進行numpy二維陣列的轉換和形式統一
y=finaldata[DM_target1].values.ravel() #將表格中的目標列向量轉換為一維的陣列,作為目標預測的向量
y=finaldata[DM_target2].values.ravel()

#進行資料的numpy資料形式轉換,為演算法的資料輸入做好準備工作
#首先第一步需要進行資料據標準化處理(線性方式)
'''
from sklearn.preprocessing import StandardScaler
s1=StandardScaler()
s1.fit(x)
x_standard=s1.transform(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)

#1-1匯入sklearn中SVM的線性分類演算法LinearSVC,處理原有的線性資料
from sklearn.preprocessing import StandardScaler
s1=StandardScaler()
s1.fit(x)
x=s1.transform(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
from sklearn.svm import LinearSVC
s11=LinearSVC(C=1e10) #需要定義超引數C,L1、L2正則化的係數,越大,容錯空間越小
#對於多分類問題的實現,需要提交引數penalty=l1/l2(正則化方式)以及multi_class=ovo/ovr(採用何種方式多分類訓練)
#LinearSVC預設方式為L2正則化,多分類為ovr模式

s11.fit(x_train,y_train) #訓練資料集訓練歸一化資料集
print(s11.score(x_test,y_test))

#改變正則化的係數C的大小,C越小,容錯空間越大
s12=LinearSVC(C=1) #C變小之後,容錯空間增大,會有部分資料區分錯誤
s12.fit(x_train,y_train) #訓練資料集訓練歸一化資料集
print(s12.score(x_test,y_test))

#1-2 sklearn中對於非線性資料的svm應用(多項式應用方式)
#SVM使用非線性資料假設的模型-手動新增多項式特徵模型
#利用管道pipeline來進行多項式核函式的SVM演算法三步—多項式迴歸特徵增加-資料歸一化-線性SVM演算法
from sklearn.preprocessing import PolynomialFeatures #輸入多項式迴歸模型
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
def polyniomailSVC(degree,C=10): #預設正則化係數C為1
return Pipeline([("poly",PolynomialFeatures(degree=degree)),
("std_scaler",StandardScaler()),
("LinearSVC",LinearSVC(C=C))
])
for i in range(1,3):
for C in range(1,10):
p=polyniomailSVC(degree=i,C=C) #使用三次的多項式特徵進行模型的訓練
p.fit(x_train,y_train)
print(p.score(x_test,y_test))



#1-3 使用自帶的多項式核函式的SVM,將資料先直接轉換為多項式的多維特徵,和傳統的多項式特徵不同
#2直接利用sklearn中自帶的多項式核函式SVM演算法,可以自動新增多項式的特徵,主要的引數kernel="poly"
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
from sklearn.svm import SVC
def polynomialkernelSVC(degree,C=1.0):
return Pipeline(
[
("std_canler",StandardScaler()),
("kernelsvc",SVC(kernel="poly",degree=degree,C=C))
]
)
for i in range(1,5):
for j in range(1,10):
p1=polynomialkernelSVC(degree=i,C=j)
p1.fit(x_train,y_train)
print(p1.score(x_test,y_test))
'''

#1-4 高斯核函式的SVM演算法的使用-非線性資料訓練模型
#呼叫sklearn中的高斯核函式RBF核(超引數主要是gamma)決定了模型的複雜度,gamma越高,越過擬合
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import numpy as np
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
def RBFkernelSVC(gamma):
return Pipeline([
("std",StandardScaler()),
("svc",SVC(kernel="rbf",gamma=gamma))
])
for i in np.arange(0.1,10,1):
sv=RBFkernelSVC(gamma=i)
sv.fit(x_train,y_train)
print(sv.score(x_test,y_test))