機器學習知識點(十六)整合學習AdaBoost演算法Java實現
阿新 • • 發佈:2019-01-05
理解http://blog.csdn.net/fjssharpsword/article/details/61913092中AdaBoost演算法,從網上找了一套簡單的程式碼加以理解。
1、基分類器,實現一個簡單的分類
1)物件類
package sk.adaboost; public class Instance { public double[] dim; //各個維度值 public int label; //類別標號 public Instance(double[] dim, int label) { this.dim = dim; this.label = label; } }
2)介面類
package sk.adaboost;
public abstract class Classifier {
public double errorRate;
public int errorNumber;
public abstract int classify(Instance instance) ;
}
3)基學習器演算法
package sk.adaboost; public class SimpleClassifier extends Classifier{ double threshold ; //分類的閾值 int dimNum; //對哪個維度分類 int fuhao = 1; //對閾值兩邊的處理 public int classify(Instance instance) { if(instance.dim[dimNum] >= threshold) { return fuhao; }else { return -fuhao; } } /** * 訓練出threshold和fuhao * @param instances * @param W 樣例的權重 * @param dim 對樣例的哪個維度進行訓練 */ public void train(Instance[] instances, double[] W, int dimNum) { errorRate = Double.MAX_VALUE; this.dimNum = dimNum; double adaThreshold = 0; int adaFuhao = 0; for(Instance instance : instances) { threshold = instance.dim[dimNum]; for(int fuhaoIt = 0; fuhaoIt < 2; fuhaoIt ++) { fuhao = -fuhao; double error = 0; int errorNum = 0; for(int i = 0; i< instances.length; i++) { if(classify(instances[i]) != instances[i].label) { error += W[i]; errorNum++; } } if(errorRate > error){ errorRate = error; errorNumber = errorNum; adaThreshold = threshold; adaFuhao = fuhao; } } } threshold = adaThreshold; fuhao = adaFuhao; } }
4)adaboost整合學習演算法
package sk.adaboost; import java.util.ArrayList; import java.util.List; public class Adaboost { Instance[] instances; List<Classifier> classifierList = null; //各個弱分類器 List<Double> alphaList = null; //每個弱分類器的權重 public Adaboost(Instance[] instances) { this.instances = instances; } public List<String> adaboost(int T) {//T個基學習器 int len = this.instances.length; double[] W = new double[len]; //初始權重 for(int i = 0; i < len; i ++) { W[i] = 1.0 / len; } classifierList = new ArrayList<Classifier>(); alphaList = new ArrayList<Double>(); List<String> iHP=new ArrayList<String>(); for(int t = 0; t < T; t++) {//T輪 Classifier cf = getMinErrorRateClassifier(W); classifierList.add(cf); double errorRate = cf.errorRate; //計算弱分類器的權重 double alpha = 0.5 * Math.log((1 - errorRate) / errorRate); alphaList.add(alpha); //更新樣例的權重 double z = 0; for(int i = 0; i < W.length; i++) { W[i] = W[i] * Math.exp(-alpha * instances[i].label * cf.classify(instances[i])); z += W[i]; } for(int i = 0; i < W.length; i++) {//規範化因子 W[i] /= z; } iHP.add(String.valueOf(getErrorCount()));//預測結果插入 } return iHP; } private int getErrorCount() { int count = 0; for(Instance instance : instances) { if(predict(instance) != instance.label) count ++; } return count; } /** * 預測 * @param instance * @return */ public int predict(Instance instance) { double p = 0; for(int i = 0; i < classifierList.size(); i++) { p += classifierList.get(i).classify(instance) * alphaList.get(i); } if(p > 0) return 1; return -1; } /** * 得到錯誤率最低的分類器 * @param W * @return */ private Classifier getMinErrorRateClassifier(double[] W) { double errorRate = Double.MAX_VALUE; SimpleClassifier minErrorRateClassifier = null; int dimLength = instances[0].dim.length; for(int i = 0; i < dimLength; i++) { SimpleClassifier sc = new SimpleClassifier(); sc.train(instances, W, i);//基學習器訓訓練 if(errorRate > sc.errorRate){ errorRate = sc.errorRate; minErrorRateClassifier = sc; } } return minErrorRateClassifier; } }
這裡面幾個重要步驟要清晰:public List<String> adaboost(int T)演算法中,先初始化權重然後開始T輪的基學習器演算法執行和權重更新,private Classifier getMinErrorRateClassifier(double[] W)開展基學習器訓練並返回錯誤率,返回的錯誤計算權重並更新分佈。
5)測試adaboost演算法,採用投票法的結合策略輸出整合學習預測結果
package sk.adaboost;
import java.util.List;
public class AdaboostTest {
public static void main(String[] args) {
//模擬資料
double[] ins1 = {0,3};
double[] ins2 = {1,3};
double[] ins3 = {2,3};
double[] ins4 = {3,1};
double[] ins5 = {4,1};
double[] ins6 = {5,1};
double[] ins7 = {6,3};
double[] ins8 = {7,3};
double[] ins9 = {8,0};
double[] ins10 = {9,1};
Instance instance1 = new Instance(ins1, 1);
Instance instance2 = new Instance(ins2, 1);
Instance instance3 = new Instance(ins3, 1);
Instance instance4 = new Instance(ins4, -1);
Instance instance5 = new Instance(ins5, -1);
Instance instance6 = new Instance(ins6, -1);
Instance instance7 = new Instance(ins7, 1);
Instance instance8 = new Instance(ins8, 1);
Instance instance9 = new Instance(ins9, 1);
Instance instance10 = new Instance(ins10, -1);
Instance[] instances = {instance1, instance2, instance3, instance4, instance5, instance6, instance7, instance8, instance9, instance10 };
//整合學習,序列,基學習器之間存在強依賴關係
Adaboost ab = new Adaboost(instances);
List<String> iHP=ab.adaboost(10);
//輸出預測結果,根據多數頭投票法的結合策略
int pcount=0,ncount=0;
for(String hp:iHP){
if (hp.equals("1")) pcount++;//預測為正例的數字
if (hp.equals("0")) ncount++;//預測為正例的數字
}
if (pcount>=ncount) System.out.println("1");
else System.out.println("0");
}
}
6)助於理解演算法本身,實際應用中基學習器可以換成其他演算法。