1. 程式人生 > >sklearn構建決策樹

sklearn構建決策樹

1.視覺化樹

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import tree
import graphviz #視覺化
import pydotplus #畫.dot檔案
from IPython.display import Image #圖片
from sklearn.model_selection import train_test_split #資料集劃分為測試集和訓練集

from sklearn.datasets.california_housing import fetch_california_housing #sklearn內建的房價的資料集

house = fetch_california_housing()
#print(house.data.shape) #(20640, 8)

dtr = tree.DecisionTreeRegressor(max_depth=2)
dtr.fit(house.data[:,[6,7]],house.target) #指定了第6,7列,fit()傳遞兩個引數X,y

#視覺化樹
#格式基本上不需要變動,這裡生成.dot檔案
dot_data = \
    tree.export_graphviz(
        dtr, #這裡是例項的名字
        out_file=None,
        feature_names=house.feature_names[6:8], #列名
        filled= True,
        impurity=False,
        rounded=True
    )

#畫樹
graph = pydotplus.graph_from_dot_data(dot_data)
graph.get_nodes()[7].set_fillcolor("#FF2DD") #填充顏色

#儲存樹
graph.write_png(r'C:\\Users\\Administrator\\Desktop\\dtr.png') #儲存圖片

在這裡插入圖片描述

2.訓練資料

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import tree
import graphviz #視覺化
import pydotplus #畫.dot檔案
from IPython.display import Image #圖片
from sklearn.model_selection import train_test_split #資料集劃分為測試集和訓練集

from sklearn.datasets.california_housing import fetch_california_housing #sklearn內建的房價的資料集

house = fetch_california_housing()
#print(house.data.shape) #(20640, 8)

x_train,x_test,y_train,y_test = train_test_split(house.data,house.target,test_size=0.1,random_state=42)

dtr = tree.DecisionTreeRegressor(random_state=42)
dtr.fit(x_train,y_train)

score = dtr.score(x_test,y_test)
print(score) #0.637318351331017