1. 程式人生 > >模型融合策略:開發樹模型輸出葉子節點作為特徵到迴歸器或者分類器的類

模型融合策略:開發樹模型輸出葉子節點作為特徵到迴歸器或者分類器的類

from sklearn.base import BaseEstimator,ClassifierMixin,RegressorMixin
from sklearn.preprocessing import OneHotEncoder
import numpy as np

class TreeLeaf(BaseEstimator,ClassifierMixin,RegressorMixin):
    """
    樹模型和其他模型的結合:樹模型輸出的葉子節點當成特徵輸入到其他模型中
    """
    def __init__(self,treeModel=[],metaModel=[],n_estimators=[],goal="
regression"): self.treeModel = treeModel self.metaModel = metaModel self.n_estimators = n_estimators self.goal = goal def fit(self,X,y): self.best_treemodel = [] #用於儲存訓練引數後的tree模型 self.best_metamodel = [] #用於儲存訓練引數後的meta模型 self.leaf_list = [] #
用於儲存葉子節點 for model in self.treeModel: model_param = model.fit(X,y) #得到訓練引數後的模型 self.best_treemodel.append(model_param) leaf = model_param.apply(X) #輸出葉子 self.leaf_list.append(leaf) #對葉子節點進行拼接
leaf_matrix = np.concatenate(self.leaf_list,axis=1) #對葉子節點進行one_hot編碼 self.one_hot_encoder = OneHotEncoder() x_one_hot = self.one_hot_encoder.fit_transform(leaf_matrix) #利用metaModel做擬合 for model in self.metaModel: model_param = model.fit(x_one_hot,y) self.best_metamodel.append(model_param) return self def predict(self,X): leaf_list_pred = [] for model in self.best_treemodel: leaf_list_pred.append(model.apply(X)) leaf_matrix_pred = np.concatenate(leaf_list_pred,axis=1) x_one_hot_pred = self.one_hot_encoder.transform(leaf_matrix_pred) y_pred_list = [] for model in self.best_metamodel: y_pred_list.append(model.predict(x_one_hot_pred)) if self.goal == "regression": return sum(y_pred_list,axis=0) elif self.goal == "classification": y_pred = np.zeros(X.shape[0]) for i,line in enumerate(np.array(y_pred_list).T): y_pred[i] = np.argmax(np.bincount(line)) return y_pred ##################案例測試#################################################### from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC from sklearn.metrics import accuracy_score from lightgbm import LGBMClassifier X,y = load_iris(return_X_y=True) X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0) treeModel_1 = RandomForestClassifier(n_estimators=20) treeModel_2 = LGBMClassifier( n_estimators=30) #treeModel_2 = GradientBoostingClassifier(n_estimators=30) metaModel_1 = LogisticRegression() metaModel_2 = SVC() tl = TreeLeaf(treeModel=[treeModel_1,treeModel_2],metaModel=[metaModel_1,metaModel_2],n_estimators=[20,30],goal="classification") tl.fit(X_train,y_train) y_pred = tl.predict(X_test) accuracy_score(y_test,y_pred)

上述程式碼主要完成了基於多個樹模型的葉子節點輸入到多個分類器或者回歸器的模型融合策略,具有一定的擴充套件性和適應度。後面給出了一個基於隨機深林和lightGBM的測試例項,供大家參考。這種模型融合策略在不同的地方效果不同,關鍵還是特徵工程是否做得更好,該類方法在訓練集上有一定的過擬合傾向。

歡迎評論和給出意見,如果對你有幫助,請給個關注,激勵一下我,謝謝!