sklearn構建決策樹
阿新 • • 發佈:2018-12-13
1.視覺化樹
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import tree import graphviz #視覺化 import pydotplus #畫.dot檔案 from IPython.display import Image #圖片 from sklearn.model_selection import train_test_split #資料集劃分為測試集和訓練集 from sklearn.datasets.california_housing import fetch_california_housing #sklearn內建的房價的資料集 house = fetch_california_housing() #print(house.data.shape) #(20640, 8) dtr = tree.DecisionTreeRegressor(max_depth=2) dtr.fit(house.data[:,[6,7]],house.target) #指定了第6,7列,fit()傳遞兩個引數X,y #視覺化樹 #格式基本上不需要變動,這裡生成.dot檔案 dot_data = \ tree.export_graphviz( dtr, #這裡是例項的名字 out_file=None, feature_names=house.feature_names[6:8], #列名 filled= True, impurity=False, rounded=True ) #畫樹 graph = pydotplus.graph_from_dot_data(dot_data) graph.get_nodes()[7].set_fillcolor("#FF2DD") #填充顏色 #儲存樹 graph.write_png(r'C:\\Users\\Administrator\\Desktop\\dtr.png') #儲存圖片
2.訓練資料
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import tree import graphviz #視覺化 import pydotplus #畫.dot檔案 from IPython.display import Image #圖片 from sklearn.model_selection import train_test_split #資料集劃分為測試集和訓練集 from sklearn.datasets.california_housing import fetch_california_housing #sklearn內建的房價的資料集 house = fetch_california_housing() #print(house.data.shape) #(20640, 8) x_train,x_test,y_train,y_test = train_test_split(house.data,house.target,test_size=0.1,random_state=42) dtr = tree.DecisionTreeRegressor(random_state=42) dtr.fit(x_train,y_train) score = dtr.score(x_test,y_test) print(score) #0.637318351331017