決策樹演算法例項(基於ID3)
阿新 • • 發佈:2019-02-05
基於ID3的決策樹演算法,文中使用的sklearn的庫,使用graphviz可以將決策樹轉換為pdf檢視。
案例中用到的模擬資料如下:
############################################################################
RIDageincomestudentcredit_ratingclass_buys_computer 1youthhighnofairno 2youthhighnoexcellentno 3middle_agedhighnofairyes 4seniormediumnofairyes 5seniorlowyesfairyes 6seniorlowyesexcellentno 7middle_agedlowyesexcellentyes 8youthmediumnofairno 9youthlowyesfairyes 10seniormediumyesfairyes 11youthmediumyesexcellentyes 12middle_agedmediumnoexcellentyes 13middle_agedhighyesfairyes 14seniormediumnoexcellentno
############################################################################
from sklearn.feature_extraction import DictVectorizer import csv from sklearn import tree from sklearn import preprocessing from sklearn.externals.six import StringIO allelectionicsData = open(r'E:\myAI\AllElectronics.csv','rb') reader = csv.reader(allelectionicsData) headers = reader.next() featureList = [] lableList = [] print(headers) for row in reader: # print row lableList.append(row[len(row)-1]) rowDict = {} for i in range(1,len(row) - 1): rowDict[headers[i]] = row[i] featureList.append(rowDict) printfeatureList vec = DictVectorizer() dummyX = vec.fit_transform(featureList).toarray() print("dummyX: " + str(dummyX)) print(vec.get_feature_names()) print("Lablelist: "+str(lableList)) lb = preprocessing.LabelBinarizer() dummyY = lb.fit_transform(lableList) print("dummyY: "+ str(dummyY)) clf = tree.DecisionTreeClassifier(criterion='entropy') clf = clf.fit(dummyX,dummyY) print("clf: " + str(clf)) with open("allelectionicsData.dot",'w') as f: f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f) oneRowX = dummyX[0,:] print("oneRowX: " + str(oneRowX)) newoneRow = oneRowX newoneRow[0] = 1 newoneRow[2] = 0 print("newoneRow : "+ str(newoneRow)) predictedY = clf.predict(newoneRow)
print("predictedY: " + str(predictedY))