Python實現決策樹應用之判斷隱形眼鏡的型別

阿新 • • 發佈：2018-11-11

程式碼模組一、DecisionTreePlot

# -*- coding:utf-8 -*-
__author__ = 'yangxin_ryan'

import matplotlib.pyplot as plt
"""
定義文字框 和 箭頭格式 
【 sawtooth 波浪方框, round4 矩形方框 , fc表示字型顏色的深淺 0.1~0.9 依次變淺，沒錯是變淺】
"""
decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-")


class DecisionTreePlot(object):

    def get_num_leafs(self, my_tree):
        num_leafs = 0
        first_str = my_tree.keys()[0]
        second_dict = my_tree[first_str]
        for key in second_dict.keys():
            if type(second_dict[key]) is dict:
                num_leafs += self.get_num_leafs(second_dict[key])
            else:
                num_leafs += 1
        return num_leafs

    def get_tree_depth(self, my_tree):
        max_depth = 0
        first_str = my_tree.keys()[0]
        second_dict = my_tree[first_str]
        for key in second_dict.keys():
            if type(second_dict[key]) is dict:
                this_depth = 1 + self.get_tree_depth(second_dict[key])
            else:
                this_depth = 1
            if this_depth > max_depth:
                max_depth = this_depth
        return max_depth

    def plot_node(self, node_txt, center_pt, parent_pt, node_type):
        self.create_plot.ax1.annotate(node_txt, xy=parent_pt,  xycoords='axes fraction', xytext=center_pt,
                                textcoords='axes fraction', va="center", ha="center", bbox=node_type,
                                arrowprops=arrow_args)

    def plot_mid_text(self, cntr_pt, parent_pt, txt_string):
        x_mid = (parent_pt[0] - cntr_pt[0]) / 2.0 + cntr_pt[0]
        y_mid = (parent_pt[1] - cntr_pt[1]) / 2.0 + cntr_pt[1]
        self.create_plot.ax1.text(x_mid, y_mid, txt_string, va="center", ha="center", rotation=30)

    def plot_tree(self, my_tree, parent_pt, node_txt):
        num_leafs = self.get_num_leafs(my_tree)
        cntr_pt = (self.plot_tree.xOff + (1.0 + float(num_leafs)) / 2.0 / self.plot_tree.totalW, self.plot_tree.yOff)
        self.plot_mid_text(cntr_pt, parent_pt, node_txt)
        first_str = my_tree.keys()[0]
        self.plot_node(first_str, cntr_pt, parent_pt, decisionNode)
        second_dict = my_tree[first_str]
        self.plot_tree.yOff = self.plot_tree.yOff - 1.0 / self.plot_tree.totalD
        for key in second_dict.keys():
            if type(second_dict[key]) is dict:
                self.plot_tree(second_dict[key], cntr_pt, str(key))
            else:
                self.plot_tree.xOff = self.plot_tree.xOff + 1.0 / self.plot_tree.totalW
                self.plot_node(second_dict[key], (self.plot_tree.xOff, self.plot_tree.yOff), self.cntr_pt, self.leaf_node)
                self.plot_mid_text((self.plot_tree.xOff, self.plot_tree.yOff), self.cntr_pt, str(key))
        self.plot_tree.yOff = self.plot_tree.yOff + 1.0 / self.plot_tree.totalD

    def create_plot(self, in_tree):
        fig = plt.figure(1, facecolor='green')
        fig.clf()
        axprops = dict(xticks=[], yticks=[])
        self.create_plot.ax1 = plt.subplot(111, frameon=False, **axprops)
        self.plot_tree.totalW = float(self.get_num_leafs(in_tree))
        self.plot_tree.totalD = float(self.get_tree_depth(in_tree))
        self.plot_tree.xOff = -0.5 / self.plot_tree.totalW
        self.plot_tree.yOff = 1.0
        self.plot_tree(in_tree, (0.5, 1.0), '')
        plt.show()

    def retrieve_tree(self, i):
        list_of_trees = [
            {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
            {'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
        ]
        return list_of_trees[i]

程式碼模組二、DescionTreeApp

# -*- coding:utf-8 -*-
__author__ = 'yangxin_ryan'

import operator
from math import log
from src.descion_tree.decision_tree_plot import DecisionTreePlot as dtPlot
import pickle
import copy


class DescionTreeApp(object):

    def create_data_set(self):
        data_set = [[1, 1, 'yes'],
                   [1, 1, 'yes'],
                   [1, 0, 'no'],
                   [0, 1, 'no'],
                   [0, 1, 'no']]
        labels = ['no surfacing', 'flippers']
        return data_set, labels

    def calc_shannon_ent(self, data_set):
        num_entries = len(data_set)
        label_counts = {}
        for feat_vec in data_set:
            current_label = feat_vec[-1]
            if current_label not in label_counts.keys():
                label_counts[current_label] = 0
            label_counts[current_label] += 1
        shannon_ent = 0.0
        for key in label_counts:
            prob = float(label_counts[key]) / num_entries
            shannon_ent -= prob * log(prob, 2)
        return shannon_ent

    def split_data_set(self, data_set, index, value):
        ret_data_set = []
        for feat_vec in data_set:
            if feat_vec[index] == value:
                reduced_feat_vec = feat_vec[:index]
                reduced_feat_vec.extend(feat_vec[index+1:])
                ret_data_set.append(reduced_feat_vec)
        return ret_data_set

    def choose_best_feature_to_split(self, data_set):
        num_features = len(data_set[0]) - 1
        base_entropy = self.calc_shannon_ent(data_set)
        best_info_gain, best_feature = 0.0, -1
        for i in range(num_features):
            feat_list = [example[i] for example in data_set]
            unique_vals = set(feat_list)
            new_entropy = 0.0
            for value in unique_vals:
                sub_data_set = self.split_data_set(data_set, i, value)
                prob = len(sub_data_set)/float(len(data_set))
                new_entropy += prob * self.calc_shannon_ent(sub_data_set)
            info_gain = base_entropy - new_entropy
            if info_gain > best_info_gain:
                best_info_gain = info_gain
                best_feature = i
        return best_feature

    def majority_cnt(self, class_list):
        class_count = {}
        for vote in class_list:
            if vote not in class_count.keys():
                class_count[vote] = 0
            class_count[vote] += 1
        sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)
        return sorted_class_count[0][0]

    def create_tree(self, data_set, labels):
        class_list = [example[-1] for example in data_set]
        if class_list.count(class_list[0]) == len(class_list):
            return class_list[0]
        if len(data_set[0]) == 1:
            return self.majority_cnt(class_list)
        best_feat = self.choose_best_feature_to_split(data_set)
        best_feat_label = labels[best_feat]
        my_tree = {best_feat_label: {}}
        del(labels[best_feat])
        feat_values = [example[best_feat] for example in data_set]
        unique_vals = set(feat_values)
        for value in unique_vals:
            sub_labels = labels[:]
            my_tree[best_feat_label][value] = self.create_tree(self.split_data_set(data_set, best_feat, value), sub_labels)
        return my_tree

    def classify(self, input_tree, feat_labels, test_vec):
        first_str = list(input_tree.keys())[0]
        second_dict = input_tree[first_str]
        feat_index = feat_labels.index(first_str)
        key = test_vec[feat_index]
        value_of_feat = second_dict[key]
        if isinstance(value_of_feat, dict):
            class_label = self.classify(value_of_feat, feat_labels, test_vec)
        else:
            class_label = value_of_feat
        return class_label

    def store_tree(self, input_tree, filename):
        fw = open(filename, 'wb')
        pickle.dump(input_tree, fw)
        fw.close()
        with open(filename, 'wb') as fw:
            pickle.dump(input_tree, fw)

    def grab_tree(self, filename):
        fr = open(filename, 'rb')
        return pickle.load(fr)

    # 應用測試二、判斷隱形眼鏡的型別
    def app_contact_lenses(self):
        fr = open('')
        lenses = [inst.strip().split('\t') for inst in fr.readlines()]
        lenses_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
        lenses_tree = self.create_tree(lenses, lenses_labels)
        dtPlot.create_plot(lenses_tree)


if __name__ == "__main__":
    app = DescionTreeApp()
    app.app_contact_lenses()

Python實現決策樹應用之判斷隱形眼鏡的型別

程式碼模組一、DecisionTreePlot # -*- coding:utf-8 -*- __author__ = 'yangxin_ryan' import matplotlib.pyplot as plt """ 定義文字框和箭頭格式【 sawtooth 波浪方框, rou

python實現決策樹程式碼

資料圖片 from sklearn.feature_extraction import DictVectorizer import csv from sklearn import preprocessing from numpy import * import nu

python實現決策樹

# -*- coding: utf-8 -*- """ Created on Thu Sep 27 10:40:47 2018 @author: Administrator """ # de template # Importing the libraries impor

python實現決策樹演算法

1. #!/usr/bin/python3 import numpy as np from sklearn import tree from sklearn.metrics import precision_recall_curve from sklearn.metrics

Python實現決策樹對西瓜進行分類

使用的周志華老師書上的例子，因為習主席講過一切不給資料集的演算法都是耍流氓，所以我這裡先給出資料集： 0,色澤,根蒂,敲聲,紋理,臍部,觸感,密度,含糖率,好瓜 1,青綠,蜷縮,濁響,清晰,凹陷,硬滑,0.697,0.46,是 2,烏黑,蜷縮,沉悶,清晰,凹陷,硬滑

機器學習經典演算法詳解及Python實現--決策樹（Decision Tree）

（一）認識決策樹 1，決策樹分類原理決策樹是通過一系列規則對資料進行分類的過程。它提供一種在什麼條件下會得到什麼值的類似規則的方法。決策樹分為分類樹和迴歸樹兩種，分類樹對離散變數做決策樹，迴歸樹對連續變數做決策樹。近來的調查表明決策樹也是最經常使用的資料探勘演算法，它

Python實現決策樹並且使用Graphvize視覺化

一、什麼是決策樹（decision tree）——機器學習中的一個重要的分類演算法決策樹是一個類似於資料流程圖的樹結構：其中，每個內部節點表示一個屬性上的測試，每個分支代表一個屬性輸出，而每個樹葉結點代表類或者類的分佈，樹的最頂層是根結點根據天氣情況決定出遊與否的案例二、決策

詳解決策樹、python實現決策樹

決策樹模型定義決策過程決策樹學習特徵選擇資訊增益計算方法 ID3演算法決策樹模型定義分類決策樹模型是一種描述對例項進行分類的樹形結構。決策樹由節點（Node）和有向邊（directed edge）組成。節

python實現決策樹分類（三）

在上一篇文章中，我們已經構建了決策樹，接下來可以使用它用於實際的資料分類。在執行資料分類時，需要決策時以及標籤向量。程式比較測試資料和決策樹上的數值，遞迴執行直到進入葉子節點。這篇文章主要使用決策樹分類器就行分類，資料集採用UCI資料庫中的紅酒，白酒資料，主要特徵包括12

Python實現——決策樹實例(離散數據/香農熵)

遍歷 values 最適比例刪除類型取值 val creat 決策樹的實現太...繁瑣了。如果只是接受他的原理的話還好說，但是要想用代碼去實現比較糟心，目前運用了《機器學習實戰》的代碼手打了一遍，決定在這裏一點點摸索一下該工程。實例的代碼在使用上運用了香農熵，並

【Python】決策樹的python實現

uia bmp say 不知道 times otto outlook lru bgm 【Python】決策樹的python實現 2016-12-08 數據分析師Nieson 1. 決策樹是什麽? 簡單地理解，就是根據一些 feature 進行分類，每個節點提一個問

Python建立決策樹—解決隱形眼鏡選擇問題

現在我們碰到這樣一個問題，一個人去醫院想配一副隱形眼鏡。我們需要通過問他4個問題，決定他需要帶眼鏡的型別。那麼如何解決這個問題呢？我們決定用決策樹。首先我們去下載一個隱形眼鏡資料集，資料來源於UCI資料庫。下載了lenses.data檔案，如下： 1 1 1 1 1 3 2 1 1

判斷點是否在多邊形內的Python實現及小應用（射線法）

判斷點是否在多邊形內的Python實現及小應用（射線法）轉 https://www.jianshu.com/p/ba03c600a557 判斷一個點是否在多邊形內是處理空間資料時經常面對的需求，例如GIS中的點選功能、根據多邊形邊界篩選出位於多邊形內的點、求交集、篩選不在多邊形內

python3.5《機器學習實戰》學習筆記（五）：決策樹演算法實戰之預測隱形眼鏡型別

一、使用決策樹預測隱形眼鏡型別在上一篇文章中，我們學習了決策樹演算法，接下來，讓我們通過一個例子講解決策樹如何預測患者需要佩戴的隱形眼鏡型別。隱形眼鏡資料集是非常著名的資料集，它包含了很多患者眼部狀況的觀察條件以及醫生推薦的隱形眼鏡型別。隱形眼鏡

基於Jupyter平臺通過python實現Spark的應用程式之wordCount

1、啟動spark平臺，介面如下： 2、啟動Jupyter，介面如下圖所示：如果你對以上啟動存在疑問的話，請看我的上一篇部落格，關於Jupyter配置Spark的。 3、功能分析 - 我們要實現的一個功能是統計詞頻 - 我們需要把統計的檔

python機器學習實戰2：實現決策樹

1.決策樹的相關知識在之前的接觸中決策樹直觀印象應該就是if-else的迴圈，if會怎麼樣，else之後再繼續if-else直至最終的結果。在上節講的kNN它其實已經可以完成很多工，但是它最大的缺點就是無法給資料集的內在含義，決策樹的主要優勢在於資料形式非常

用Python實現數據結構之二叉搜索樹

wke rmi 方法 list lov tid yii last pku 二叉搜索樹二叉搜索樹是一種特殊的二叉樹，它的特點是：對於任意一個節點p，存儲在p的左子樹的中的所有節點中的值都小於p中的值對於任意一個節點p，存儲在p的右子樹的中的所有

資料探勘入門系列教程（四）之基於scikit-lean實現決策樹

資料探勘入門系列教程（四）之基於scikit-lean決策樹處理Iris載入資料集資料特徵訓練隨機森林調參工程師結尾資料探勘入門系列教程（四）之基於scikit-lean決策樹處理Iris 在上一篇部落格，我們介紹了決策樹的一些知識。如果對決策樹還不是很瞭解的話，建議先閱讀上一篇部落格，在來學習這

雪飲者決策樹系列（二）決策樹應用

ssi 字符串長度 mes pla 選擇 font com vector nac 　　本篇以信息增益最大作為最優化策略來詳細介紹決策樹的決策流程。　　首先給定數據集，見下圖　　註：本數據來源於網絡本篇將以這些數據作為訓練數據（雖然少，但足以介紹清楚原理！），下圖是決

R_針對churn資料用id3、cart、C4.5和C5.0建立決策樹模型進行判斷哪種模型更合適

　　data(churn)匯入自帶的訓練集churnTrain和測試集churnTest 　　用id3、cart、C4.5和C5.0建立決策樹模型，並用交叉矩陣評估模型，針對churn資料，哪種模型更合適　　　　決策樹模型 ID3/C4.5/CART演算法比較　　　傳送門

Python實現決策樹應用之判斷隱形眼鏡的型別

相關推薦