1. 程式人生 > 實用技巧 >多演算法融合—1

多演算法融合—1

#1-1 多演算法融合思想的使用——KNN演算法引數尋優
from sklearn.feature_selection import SelectKBest
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold #交叉驗證Kfold方式
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score #匯入整體模型的準確度
from sklearn.metrics import confusion_matrix #匯入整體模型的混淆矩陣
from sklearn.metrics import precision_score #匯入整體模型的精準率
from sklearn.metrics import recall_score #匯入整體模型的召回率
from sklearn.metrics import f1_score

#匯入所需要訓練的資料集
finaldata=pd.read_excel("C:/Users/y50014900/Desktop/過程測試_033GRR10L4105623_20200601-20200708_IL_DM_異常檢測分類結果.xlsx")
feature=["p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p20","p21","p22","p23"]
DM_target1=["DM1"]
DM_target2=["DM2"]
x=finaldata.iloc[:,2:71]
print(x)
x=np.array(x) #對資料的輸入需要進行numpy二維陣列的轉換和形式統一
y=finaldata[DM_target1].values.ravel() #將表格中的目標列向量轉換為一維的陣列,作為目標預測的向量
y=finaldata[DM_target2].values.ravel()
#進行資料的numpy資料形式轉換,為演算法的資料輸入做好準備工作

#開始進行KNN演算法的訓練和尋優過程,進行不斷地輸出結果比較即可實現最優模型地尋找
best_k=0
best_p=0
best_accuracy=0
best_recall=0
best_k1=0
best_p1=0
best_Comprehensive=0
best_Comprehensive_1=0
for k in range(2, 11):
for p in range(1, 6):
print("p=",p,"k=",k)
kf = KFold(n_splits=5, shuffle=True, random_state=123)

accuracy_score_all=[]
recall_score_all=[]
precision_score_all=[]
for i,(train_ind,valid_ind) in enumerate(kf.split(x)):
print("FOLD",i+1,"out of",5)
x_train,y_train=x[train_ind],y[train_ind]
x_valid,y_valid=x[valid_ind],y[valid_ind]

knn = KNeighborsClassifier(weights="distance", p=p, n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_valid)

accuracy_score1=accuracy_score(y_predict,y_valid)
recall_score1=recall_score(y_predict,y_valid)
precision_score1=precision_score(y_predict,y_valid)

accuracy_score_all.append(accuracy_score1)
recall_score_all.append(recall_score1)
precision_score_all.append(precision_score1)

#三大評價指標
accuracy_score_mean=np.mean(accuracy_score_all)
recall_score_mean=np.mean(recall_score_all)
precision_score_mean=np.mean(precision_score_all)

#綜合指標地計算方式,需要計算
best_Comprehensive1 = 2*accuracy_score_mean*recall_score_mean/(accuracy_score_mean+recall_score_mean)
best_Comprehensive2 = 1/(1/3*(1/accuracy_score_mean+1/recall_score_mean+1/precision_score_mean))

print(accuracy_score_mean)
print(recall_score_mean)
print(best_Comprehensive1)
print(best_Comprehensive2)

if best_accuracy<accuracy_score_mean:
best_accuracy=accuracy_score_mean
best_k=k
best_p=p
if best_recall<recall_score_mean:
best_recall=recall_score_mean
best_k1 = k
best_p1 = p
if best_Comprehensive<best_Comprehensive1:
best_Comprehensive=best_Comprehensive1
best_k2 = k
best_p2 = p
if best_Comprehensive_1<best_Comprehensive2:
best_Comprehensive_1=best_Comprehensive2
best_k3 = k
best_p3 = p

print("---------------------------------------")

print(best_recall)
print(best_accuracy)
print(best_Comprehensive)
print(best_Comprehensive_1)

print(best_k)
print(best_p)

print(best_k1)
print(best_p1)

print(best_k2)
print(best_p2)

print(best_k3)
print(best_p3)

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=666)
knn=KNeighborsClassifier(n_neighbors=7,p=2,weights="distance")
knn.fit(x_train,y_train)
print(knn.score(x_test,y_test))

#1-2 多演算法融合思想的使用——邏輯迴歸演算法引數尋優
from sklearn.feature_selection import SelectKBest
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold #交叉驗證Kfold方式
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score #匯入整體模型的準確度
from sklearn.metrics import confusion_matrix #匯入整體模型的混淆矩陣
from sklearn.metrics import precision_score #匯入整體模型的精準率
from sklearn.metrics import recall_score #匯入整體模型的召回率
from sklearn.metrics import f1_score

#匯入所需要訓練的資料集
finaldata=pd.read_excel("C:/Users/y50014900/Desktop/過程測試_033GRR10L4105623_20200601-20200708_IL_DM_異常檢測分類結果.xlsx")
feature=["p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p20","p21","p22","p23"]
DM_target1=["DM1"]
DM_target2=["DM2"]
x=finaldata.iloc[:,2:71]
print(x)
x=np.array(x) #對資料的輸入需要進行numpy二維陣列的轉換和形式統一
y=finaldata[DM_target1].values.ravel() #將表格中的目標列向量轉換為一維的陣列,作為目標預測的向量
y=finaldata[DM_target2].values.ravel()
#進行資料的numpy資料形式轉換,為演算法的資料輸入做好準備工作

from sklearn.linear_model import LogisticRegression
#1-2sklearn中的邏輯迴歸(多項式參與,並不帶正則化),採用管道的方式可以訓練模型
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=666)

def Polynomiallogisticregression(degree,C): #邏輯迴歸的三大超引數:多項式係數,正則化係數,正則化方式L1/L2
return Pipeline([
("poly",PolynomialFeatures(degree=degree)),
("std_reg",StandardScaler()),
("log_reg",LogisticRegression(C=C,penalty="l2",max_iter=10000000))
])
best_k=0
best_p=0
best_accuracy=0
best_recall=0
best_k1=0
best_p1=0
best_Comprehensive=0
best_Comprehensive_1=0
for degree in range(1,3):
for C in np.arange(0.01,0.2,0.01):
print("i=",degree,"C=",C)
kf = KFold(n_splits=5, shuffle=True, random_state=123)
accuracy_score_all=[]
recall_score_all=[]
precision_score_all=[]
for i,(train_ind,valid_ind) in enumerate(kf.split(x)):
print("FOLD",i+1,"out of",5)
x_train,y_train=x[train_ind],y[train_ind]
x_valid,y_valid=x[valid_ind],y[valid_ind]

log = Polynomiallogisticregression(degree=degree,C=C)
log.fit(x_train,y_train)
y_predict=log.predict(x_valid)

accuracy_score1=accuracy_score(y_predict,y_valid)
recall_score1=recall_score(y_predict,y_valid)
precision_score1=precision_score(y_predict,y_valid)

accuracy_score_all.append(accuracy_score1)
recall_score_all.append(recall_score1)
precision_score_all.append(precision_score1)

#三大評價指標
accuracy_score_mean=np.mean(accuracy_score_all)
recall_score_mean=np.mean(recall_score_all)
precision_score_mean=np.mean(precision_score_all)

#綜合指標地計算方式,需要計算
best_Comprehensive1 = 2*accuracy_score_mean*recall_score_mean/(accuracy_score_mean+recall_score_mean)
best_Comprehensive2 = 1/(1/3*(1/accuracy_score_mean+1/recall_score_mean+1/precision_score_mean))

print(accuracy_score_mean)
print(recall_score_mean)
print(best_Comprehensive1)
print(best_Comprehensive2)

if best_accuracy<accuracy_score_mean:
best_accuracy=accuracy_score_mean
best_degree=degree
best_C=C
if best_recall<recall_score_mean:
best_recall=recall_score_mean
best_degree1 = degree
best_C1= C
if best_Comprehensive<best_Comprehensive1:
best_Comprehensive=best_Comprehensive1
best_degree2 = degree
best_C2 = C
if best_Comprehensive_1<best_Comprehensive2:
best_Comprehensive_1=best_Comprehensive2
best_degree3 = degree
best_C3 = C

print("---------------------------------------")

print(best_recall)
print(best_accuracy)
print(best_Comprehensive)
print(best_Comprehensive_1)

print(best_degree)
print(best_C)

print(best_degree1)
print(best_C1)

print(best_degree2)
print(best_C2)

print(best_degree3)
print(best_C3)

f=Polynomiallogisticregression(degree=2,C=0.02)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=666)
f.fit(x_train,y_train)
print(f.score(x_test,y_test))