package com.gk.dmMethod;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

com.sun.corba.se.impl.encoding.OSFCodeSetRegistry.Entry; /** *樸素貝葉斯:要預測的必須是分類屬性 * * *1.設定訓練集檔案,指定要預測屬性[預設最後一個為預測屬性] *2.生成模型 *3.設定測試集檔案,[判斷檔案是否符合要求,有無缺失] *4.預測,指定輸出[預設Console,輸出] * * */ public class NaiveBayesian{ private boolean isSetTrainSet; private boolean isModelGenerted; private boolean
isSetTestSet; private String trainSet; private String testSet; private int num = -1; private int total_trainNum = 0; Map<String, Double> mapProba = new HashMap<String,Double>(); public NaiveBayesian(){ isSetTestSet = false; isModelGenerted = false
; isSetTestSet = false; } public void setTrainSetFile(String trainSetFile) throws Exception{ setTrainSet(trainSetFile); isSetTrainSet = true; isModelGenerted = false; } /** * 只是賦值 * @param trainSet * @throws Exception */ private void setTrainSet(String trainSet)throws Exception{ this.trainSet = trainSet; } public void setTestSetFile(String testSetFile){ setTestSet(testSetFile); isSetTestSet = true; } private void setTestSet(String testSet){ this.testSet = testSet; } public void generatePredictedModel() throws IOException, Exception{ //TODO generate the model BufferedReader reader; System.out.println(trainSet); reader = new BufferedReader(new FileReader(trainSet)); String line =""; while((line= reader.readLine()) != null){ addToMap(line); } reader.close(); isModelGenerted = true; } private Map<String, ArrayList<AttributeS>> map = new HashMap<String,ArrayList<AttributeS>>(); public void getModel(){ String model =""; for(Map.Entry<String,ArrayList<AttributeS>> entry : map.entrySet()){ System.out.println(entry.getKey()); for(AttributeS attr:entry.getValue()){ System.out.println(attr); } System.out.println(); } } public void precdite()throws Exception{ if(!isModelGenerted) throw new Exception("Haven't Generaten The PredicatedModel"); if(!isSetTestSet) throw new Exception("Haven't set the Test File"); //TODO precdite; double max =0; BufferedReader reader = new BufferedReader(new FileReader(testSet));; String line = ""; while((line = reader.readLine()) != null){ String[] tokens = line.split(" "); //Map<String, ArrayList<AttributeS>> map = new HashMap<String,ArrayList<AttributeS>>(); // System.out.println(Arrays.toString(tokens)); // System.out.println(); int sum =0; for(Map.Entry<String,ArrayList<AttributeS>> entry:map.entrySet()){ sum +=entry.getValue().size(); double temp = 1; double temp_1; for(int i =0;i<tokens.length;i++){ temp_1 = entry.getValue().get(i).getP(tokens[i]); System.out.print(temp_1 + " * "); temp *= entry.getValue().get(i).getP(tokens[i]); } System.out.println(); System.out.println(mapProba.get(entry.getKey())); mapProba.put(entry.getKey(),temp * mapProba.get(entry.getKey()) / total_trainNum); } // System.out.println("sum:" + sum); // for(Map.Entry<String,ArrayList<AttributeS>> entry:map.entrySet()){ //// mapProba.put(entry.getKey(),); // double temp = entry.getValue().size() * 1.0 / sum; // System.out.println("============" + temp); // mapProba.put(entry.getKey(),mapProba.get(entry.getKey()) *temp ); // } // System.out.println(line); // System.out.println("==================="); for(Map.Entry<String,Double> entry:mapProba.entrySet()){ System.out.println(entry.getKey() + " :\t"+entry.getValue()); } System.out.println("==================="); System.out.println(); } } private void addToMap(String line) throws Exception{ // System.err.println("add to map : " + line); String[] temp = line.split(" "); if(num == -1) num = temp.length; if(num != temp.length) throw new Exception("the file has some error :" + line); int tempNum = num-1; int i =0; ArrayList<AttributeS> list =null; total_trainNum++; if(!mapProba.containsKey(temp[tempNum])) mapProba.put(temp[tempNum],(double) 1); else mapProba.put(temp[tempNum],mapProba.get(temp[tempNum]) + 1); if(!map.containsKey(temp[tempNum])) { list = new ArrayList<AttributeS>(); for(i = 0;i < tempNum;i++){ try{ Integer.parseInt(temp[i]); list.add(new NumericalAttr()); }catch(Exception e){ list.add(new CategoricalAttr()); } } map.put(temp[tempNum], list); }// end if list = map.get(temp[tempNum]); for(i =0;i<tempNum;i++){ list.get(i).add(temp[i]); map.put(temp[tempNum],list); } } } /*採用懶人設計模式,第一次呼叫的時候再例項化*/ abstract class AttributeS{ abstract void add(String attr) throws Exception; abstract double getP(String attribute) throws Exception; } class CategoricalAttr extends AttributeS{ int sum =0; Map<String,Integer> map = new HashMap<String,Integer>(); void add(String attr){ sum++; if(!map.containsKey(attr)) map.put(attr,1); else map.put(attr,map.get(attr)+1); } double getP(String attribute) throws Exception{ double result = 0; try{ result = map.get(attribute) * 1.0 / sum; }catch(Exception e){ throw new Exception("Can't find the attribute : " + attribute); } return result; } @Override public String toString() { for(Map.Entry<String,Integer> entry:map.entrySet()){ System.out.print("<" + entry.getKey() + "," + entry.getValue()+">,"); } return ""; } } class NumericalAttr extends AttributeS{ int sum; int n; double average; double vaiance;//方差 double vaiance_standard;//標準差 double temp_xi =0; List<Integer> list = new ArrayList<Integer>(); boolean isComputed = false; void add(String attr) throws Exception{ if(isComputed) throw new Exception("the model has been Computed"); try{ Integer temp = Integer.parseInt(attr); list.add(temp); temp_xi += temp * temp; sum+=temp; }catch(Exception e){ throw new Exception("Some error in attribute"); } } double getP(String attribute){ //TODO if(isComputed){ compute(); isComputed = true; } int temp = Integer.parseInt(attribute); return Math.log(-(Math.pow((temp - average),2)) / (2 * vaiance)) / (vaiance_standard * Math.sqrt(2 * Math.PI)); } private void compute(){ average = sum * 1.0/ list.size(); vaiance = average * average * list.size() + temp_xi - 2 * sum * average; vaiance_standard = Math.sqrt(vaiance); } @Override public String toString() { return list.toString(); } }


### 樸素貝葉斯分類原理 對於給定的訓練資料集,首先基於特徵條件獨立假設學習輸入/輸出的聯合概率分佈;然後基於此模型,對給定的輸入$x$,利用貝葉斯定理求出後驗概率最大的輸出$y$。 **特徵獨立性假設**:在利用貝葉斯定理進行預測時,我們需要求解條件概率$P(x|y_k)=P(x_1,x_2,...,x



對於樸素貝葉斯演算法相信做資料探勘和推薦系統的小夥們都耳熟能詳了,演算法原理我就不囉嗦了。我主要想通過java程式碼實現樸素貝葉斯演算法,思想: 1. 用javabean +Arraylist 對於訓練資料儲存 2. 對於樣本資料訓練 具體的程式碼如下:package NB


實驗描述: 對指定資料集進行分類問題的分析,選擇適當的分類演算法,編寫程式實現,提交程式和結果報告 資料集: balance-scale.data(見附件一) ,已有資料集構建貝葉斯分類器。 資料包括四個屬性:五個屬性值 第一個屬性值表示樣本的類別號,其他四個屬性為四個不同

1.關於貝葉斯分類 bayes 是一種統計學分類方法,它基於貝葉斯定理,它假定一個屬性值對給定類的影響獨立於其它屬性點的值。該假定稱作類條件獨立。做次假定是為了簡化所需計算,並在此意義下稱為“樸素的”。 bayes分類的演算法大致如下: (1)對於屬性值是離散的,並且目