1. 程式人生 > >決策樹演算法例項(基於ID3)

決策樹演算法例項(基於ID3)

基於ID3的決策樹演算法,文中使用的sklearn的庫,使用graphviz可以將決策樹轉換為pdf檢視。

案例中用到的模擬資料如下:

############################################################################

RIDageincomestudentcredit_ratingclass_buys_computer 1youthhighnofairno 2youthhighnoexcellentno 3middle_agedhighnofairyes 4seniormediumnofairyes 5seniorlowyesfairyes 6seniorlowyesexcellentno 7middle_agedlowyesexcellentyes 8youthmediumnofairno 9youthlowyesfairyes 10seniormediumyesfairyes 11youthmediumyesexcellentyes 12middle_agedmediumnoexcellentyes 13middle_agedhighyesfairyes 14seniormediumnoexcellentno

############################################################################

from sklearn.feature_extraction import DictVectorizer
import  csv
from sklearn import  tree
from  sklearn import  preprocessing
from sklearn.externals.six import StringIO
allelectionicsData = open(r'E:\myAI\AllElectronics.csv'
,'rb') reader = csv.reader(allelectionicsData) headers = reader.next() featureList = [] lableList = [] print(headers) for row in reader: # print row lableList.append(row[len(row)-1]) rowDict = {} for i in range(1,len(row) - 1): rowDict[headers[i]] = row[i] featureList.append(rowDict) print
featureList vec = DictVectorizer() dummyX = vec.fit_transform(featureList).toarray() print("dummyX: " + str(dummyX)) print(vec.get_feature_names()) print("Lablelist: "+str(lableList)) lb = preprocessing.LabelBinarizer() dummyY = lb.fit_transform(lableList) print("dummyY: "+ str(dummyY)) clf = tree.DecisionTreeClassifier(criterion='entropy') clf = clf.fit(dummyX,dummyY) print("clf: " + str(clf)) with open("allelectionicsData.dot",'w') as f: f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f) oneRowX = dummyX[0,:] print("oneRowX: " + str(oneRowX)) newoneRow = oneRowX newoneRow[0] = 1 newoneRow[2] = 0 print("newoneRow : "+ str(newoneRow)) predictedY = clf.predict(newoneRow)

print("predictedY: " + str(predictedY))