使用sklearn中的BaggingClassifier去實現bagging分類
阿新 • • 發佈:2021-10-25
使用sklearn去實現bagging分類
這裡採用3次10折交叉驗證
# test classification dataset from sklearn.datasets import make_classification # define dataset X, y = make_classification(n_samples=1000, # 樣本數目 n_features=20, # 特徵數目 n_informative=15, # 有效特徵數目 n_redundant=5, #冗餘特徵數目 # n_repeated=0, # 重複特徵個數(有效特徵和冗餘特徵的隨機組合) # n_classes=3, # 樣本類別 # n_clusters_per_class=1, # 簇的個數 random_state=5) # summarize the dataset print(X.shape, y.shape) # evaluate bagging algorithm for classification from numpy import mean from numpy import std from sklearn.datasets import make_classification from sklearn.model_selection import cross_val_score from sklearn.model_selection import RepeatedStratifiedKFold from sklearn.ensemble import BaggingClassifier # define dataset X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=5) # define the model model = BaggingClassifier() # evaluate the model cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) #重複三次的10折交叉驗證 n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise') # report performance print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))