分治法實現分類的python實現
阿新 • • 發佈:2019-02-16
def makeDataSet(fileName): """Reads a training set from the specified file.""" tSet=[] #open file. Fix the error checking fileDescriptor=open(fileName) for fLine in fileDescriptor: fLine=fLine.strip() #strip off end-of-line character "\n" #if a '?' in the patient data, skip that patient if '?' in fLine: continue id,a1,a2,a3,a4,a5,a6,a7,a8,a9,diag=fLine.split(',') if diag=='4': #diagnosis is 'malignant' diagMorB='m' else: #diagnosis is 'benign' diagMorB='b' patientTuple=(id,diagMorB,int(a1),int(a2),int(a3),int(a4),\ int(a5),int(a6),int(a7),int(a8),int(a9)) tSet.append(patientTuple) return tSet def sumLists(list1,list2): """Element-by-element sums of two lists of 9 items.""" sumList=[0.0]*9 for index in range(0,9): sumList[index]=list1[index]+list2[index] return sumList def makeAverages(sumList,total): """Convert each list element into an average by dividing by the total.""" averageList=[0.0]*9 for index in range(9): averageList[index]=sumList[index]/float(total) return averageList def trainClassifier(trainingSet): """Build a classifier using the training set.""" benignSums=[0]*9 benignCount=0 malignantSums=[0]*9 malignantCount=0 for patientTup in trainingSet: if patientTup[1]=='b': benignSums=sumLists(benignSums,patientTup[2:]) benignCount+=1 else: malignantSums=sumLists(malignantSums,patientTup[2:]) malignantCount+=1 benignAvgs=makeAverages(benignSums,benignCount) malignantAvgs=makeAverages(malignantSums,malignantCount) classifier=makeAverages(sumLists(benignAvgs,malignantAvgs),float(2)) return classifier def classifyTestSet(testSet,classifier): """Run classifier on the test set.""" results=[] #for each patient for patient in testSet: benignCount=0 malignantCount=0 #for each attribute of the patient for index in range(0,9): #if actual patient attributes is greater than separator value if patient[index+2]>classifier[index]: malignantCount+=1 else: benignCount+=1 #record patient id,both counts,and actual diganosis resultTuple=(patient[0],benignCount,malignantCount,patient[1]) #add patient to list of results results.append(resultTuple) return results def reportResults(results): """Determine accuracy of classifier and report.""" totalCount=0 inaccurateCount=0 for r in results: totalCount+=1 #if benignCount>malignantCount, we should predict 'b' if r[1]>r[2]: if r[3]=='m': #we are wrong... inaccurateCount+=1 #if malignantCount>benignCount,we should predict 'm' elif r[3]=='b': inaccurateCount+=1 print("of %d" %totalCount,"patients,there were %d"%inaccurateCount,'inaccuracies') def main(): print("reading in training data") trainingFile="breast-cancer-wisconsin.data" trainingSet=makeDataSet(trainingFile) print('Done reading training data.') print('Training classifier...') classifier=trainClassifier(trainingSet) print("Done training classifier.") print("The value of classifier:") print(classifier) print("Reading in test data...") testFile="breast-cancer-wisconsin.data" testSet=makeDataSet(testFile) print('Done reading test data.') print('Classifying records...') resultList=classifyTestSet(testSet,classifier) print('Done classifying.') reportResults(resultList) print('Program finished.')
執行指令和結果如下: