打印隨機森林模型
阿新 • • 發佈:2018-08-15
character 打印 special filled ring ict 需要 sam rap
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
import pydot
from sklearn.externals.six import StringIO
from IPython.display import Image
import pydotplus
train = pd.read_csv("train2.csv", dtype={"Age": np.float64},)
print train.head(10)
def harmonize_data(titanic):
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1
titanic["Embarked"] = titanic["Embarked"].fillna("S")
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2
titanic["Fare"] = titanic["Fare"].fillna(titanic["Fare"].median())
return titanic
harmonize_data(train)
print "ok"
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
results = []
sample_leaf_options = list(range(1, 500, 3))
n_estimators_options = list(range(1, 1000, 5))
groud_truth = train[‘Survived‘][601:]
alg = RandomForestClassifier(min_samples_leaf=50, n_estimators=5, random_state=50)
alg.fit(train[predictors][:600], train[‘Survived‘][:600])
predict = alg.predict(train[predictors][601:])
#print groud_truth == predict
results.append((50, 5, (groud_truth == predict).mean()))
#print((groud_truth == predict).mean())
print(results)
Estimators = alg.estimators_
for index, model in enumerate(Estimators):
filename = ‘iris_‘ + str(index) + ‘.pdf‘
dot_data = tree.export_graphviz(model , out_file=None,
feature_names=predictors,
class_names=["die","live"],
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
graph.write_pdf(filename)
前提需要安裝graphviz
yum install graphviz
涉及到的訓練集參考上一篇文章
打印隨機森林模型