資訊熵計算(自己編寫的python程式碼,垃圾,高手繞道)
阿新 • • 發佈:2018-12-25
# -*- coding:utf-8 -*-
'''
Created on 2017年9月15日
@author: snow
'''
import csv;
import math;
fileName = "AllElectronics.csv";
def allData():
csv_reader = csv.reader(open(fileName, encoding='UTF-8'));
fileContent = [];
for row in csv_reader:
fileContent.append(row);
headers = fileContent[0];
dataContent = [];
labels=[];
for i in range(1,len(fileContent)):
dataContent.append(fileContent[i][-1]);
labels.append(fileContent[i][-1]);
dataSet = [];
for row in (dataContent):
rowData=row[1:len(row)-1];
dataSet.append(rowData);
return headers,dataContent,labels,dataSet;
headers,dataContent,labels,dataSet = allData();
numEntries = len(labels);
def calEnt(labels):
labelCounts={};
for lable in labels:
if lable not in labelCounts.keys():
labelCounts[lable] = 0;
labelCounts[lable]+=1;
shannonEnt=0.0;
for key in labelCounts.keys():
print(labelCounts[key]);
prob = float(labelCounts[key])/numEntries;
shannonEnt -= prob * math.log(prob,2) # 以2為底的對數
return shannonEnt
res = calEnt(labels);
print(res);