1. 程式人生 > 實用技巧 >利用隨機森林進行特徵重要性評估

利用隨機森林進行特徵重要性評估

https://blog.csdn.net/xiezhen_zheng/article/details/82011908

import pandas as pd

df = pd.read_csv('D:Users/FengZH2/Desktop/test/testdata.csv',encoding='gbk')

df.info()

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
x, y = df.iloc[:, 1:].values, df.iloc[:, 0
].values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0) feat_labels = df.columns[1:] forest = RandomForestClassifier(n_estimators=10000, random_state=0, n_jobs=-1) forest.fit(x_train, y_train.astype('int')) importances = forest.feature_importances_ import numpy
as np indices = np.argsort(importances)[::-1] for f in range(x_train.shape[1]): print("%2d) %-*s %f" % (f + 1, 30, feat_labels[indices[f]], importances[indices[f]])) threshold = 0.15 x_selected = x_train[:, importances > threshold] x_selected.shape import matplotlib.pyplot as plt plt.figure(1
) plt.title('Feature Importances') plt.barh(range(len(indices)), importances[indices], color='b', align='center') plt.xlabel('Relative Importance')