opencv下LDA二分類
根據http://www.cnblogs.com/cfantaisie/archive/2011/03/25/1995849.html這是matlab版本下的LDA分類:
[model,k,ClassLabel]=LDATraining(traindata,trainlabel);
>> outputlabel=LDATesting(testdata,k,model,ClassLabel);
>> accurency=length(find(outputlabel==testlabel))/length(testlabel)這樣既可實現分類 得到測試樣本的準確率 很簡潔方便
測試部分其實就是利用訓練得到的model 然後看了下這個model到底是什麼怎麼來的:
我的是二分類 對於二分類而言 這個model是這樣的:
W是dimsX1的投影矩陣 means是traindata每一類經過投影W後的平均值
在OpenCV裡有封裝好的LDA類:我知道opencv裡有結合好的LDA+Fisher人臉識別的FaceRecognizer這個類 那個比較好弄 但如果不是用LDA做人臉 而是分類普通資料就麻煩點了
根據http://blog.csdn.net/cjc211322/article/details/26590027?utm_source=tuicool&utm_medium=referral
http://www.tuicool.com/articles/BvuQFr http://www.cnblogs.com/freedomshe/archive/2012/04/24/sift_kmeans_lda_img_classification.html#L6
//LDA http://blog.csdn.net/zhazhiqiang/article/details/21189415 http://www.cnblogs.com/cfantaisie/archive/2011/03/25/1995849.html
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/ml/ml.hpp>
#include <iostream>
#include"contrib.hpp"
using namespace cv;
using namespace std;
int main()
{
//sampledata
double sampledata[6][2] = { { 0, 1 }, { 0, 2 }, { 2, 4 }, { 8, 0 }, { 8, 2 }, { 9, 4 } };
Mat mat = Mat(6, 2, CV_64FC1, sampledata);
//labels
vector<int> labels;
for (int i = 0; i<mat.rows; i++)
{
if (i<mat.rows / 2)
{
labels.push_back(0);
}
else
{
labels.push_back(1);
}
}
//do LDA
LDA lda = LDA(mat, labels);
//get the eigenvector
Mat eivector = lda.eigenvectors().clone();
cout << "The eigenvector is:" << endl;
for (int i = 0; i<eivector.rows; i++)
{
for (int j = 0; j<eivector.cols; j++)
{
cout << eivector.ptr<double>(i)[j] << " ";
}
cout << endl;
}
//針對兩類分類問題,計算兩個資料集的中心
int classNum = 2;
vector<Mat> classmean(classNum);
vector<int> setNum(classNum);
for (int i = 0; i<classNum; i++)
{
classmean[i] = Mat::zeros(1, mat.cols, mat.type());
setNum[i] = 0;
}
Mat instance;
for (int i = 0; i<mat.rows; i++)
{
instance = mat.row(i);
if (labels[i] == 0)
{
add(classmean[0], instance, classmean[0]);
setNum[0]++;
}
else if (labels[i] == 1)
{
add(classmean[1], instance, classmean[1]);
setNum[1]++;
}
else
{
}
}
for (int i = 0; i<classNum; i++)
{
classmean[i].convertTo(classmean[i], CV_64FC1, 1.0 / static_cast<double>(setNum[i]));
}
vector<Mat> cluster(classNum);
for (int i = 0; i<classNum; i++)
{
cluster[i] = Mat::zeros(1, 1, mat.type());
multiply(eivector.t(), classmean[i], cluster[i]);
}
cout << "The project cluster center is:" << endl;
for (int i = 0; i<classNum; i++)
{
cout << cluster[i].at<double>(0) << endl;
}
system("pause");
return 0;
}
現在想用opencv的LDA來對自己的資料分類 根據http://blog.csdn.net/cjc211322/article/details/26590027?utm_source=tuicool&utm_medium=referral看到opencv的LDA類有建構函式、投影函式、計算特徵向量和特徵值的函式,可是沒有測試函式?然後應該可以根據matlab的自己寫測試部分:
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/ml/ml.hpp>
#include <iostream>
#include"contrib.hpp"
using namespace cv;
using namespace std;
int main()
{
//read traindata trainlabel testdata testlabel and make them into Mat
CvMLData trainfeature, testfeature, trainlabelprimer, testlabelprimer;
trainfeature.read_csv("traindata.csv");
Mat traindata = Mat(trainfeature.get_values(), true);
testfeature.read_csv("testdata.csv");
Mat testdata = Mat(testfeature.get_values(), true);
trainlabelprimer.read_csv("trainlabel.csv");
Mat trainlabel = Mat(trainlabelprimer.get_values(), true);
testlabelprimer.read_csv("testlabel.csv");
Mat testlabel = Mat(testlabelprimer.get_values(), true);
int positivenum_train = 128, negativenum_train = 238;
//make trainlabel into vector<int>
vector<int> labelfortrain;
for (int i = 0; i < trainlabel.rows; ++i)
{
uchar* data = trainlabel.ptr<uchar>(i);
labelfortrain.push_back(data[0]);
}
//do LDA
//void LDA::compute(InputArrayOfArrays _src, InputArray _lbls) //第一個引數暫時只支援vector<Mat>,並且Mat必須是單通道的 //並且每個Mat的大小要相同,因為在實際操作中都是需要將Mat轉化為一個行向量 //compute函式的實現主要是根據第一個引數的型別呼叫LDA::lda來實現
//void LDA(const Mat& src, vector<int> labels) //這裡要求訓練樣本總數N要大於特徵的維數D
cout << "LDA training..." << endl;
LDA lda = LDA(traindata, labelfortrain);
cout << "LDA training done!" << endl;
//get the eigenvector //相當於matlab裡LDATraining所得model的W dimsX(num-1)
Mat eivector = lda.eigenvectors().clone();
//get the mean after projection //相當於matlab裡LDATraining所得model的means numX1
Mat projectionimg=lda.project(traindata);
Scalar mean,stddev;
meanStdDev(projectionimg, mean, stddev);
float getmean = mean[0];
cout << "2 classes mean:" << getmean << endl;
////////////////////
Mat myimg1(positivenum_train, projectionimg.cols, projectionimg.type()), myimg2(negativenum_train, projectionimg.cols, projectionimg.type());
for (int i = 0; i < positivenum_train; i++)
{
uchar* myrow = myimg1.ptr<uchar>(i);
uchar* row = projectionimg.ptr<uchar>(i);
for (int j = 0; j <projectionimg.cols; j++)
myrow[j] = row[j];
}
Scalar means1, means2;
Scalar stddev1, stddev2;
meanStdDev(myimg1, means1, stddev1);
float getmean1 = means1[0];
cout << getmean1 << endl;
/*
for (int i =0; i < negativenum_train; ++i)
{
uchar* myrow = myimg2.ptr<uchar>(i);
int ii = i + positivenum_train ;
uchar* row =projectionimg.ptr<uchar>(ii);
for (int j = 0; j <projectionimg.cols; j++)
myrow[j] = row[j];
}
meanStdDev(myimg2, means2, stddev2);
float getmean2 = means2[0];
cout << getmean2<<endl;
*/
return 0;
} //可以看到投影矩陣有 兩個類投影后的平均值有 就差每一類投影后的平均值了
得到兩個類投影后的projectionimg的平均值是0.804454 然而我要的是每一類投影后的平均值 可是第一類投影后的怎麼是個負數呢?難道是我把projectionimg裡的第一類的存在myimg1裡時候出錯了?發現自己太蠢了
直接用rowRange就好了 不過我覺得自己編得沒錯 資料怎麼傳不到myimg1裡去呢?
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/ml/ml.hpp>
#include <iostream>
#include"contrib.hpp"
using namespace cv;
using namespace std;
int main()
{
//read traindata trainlabel testdata testlabel and make them into Mat
CvMLData trainfeature, testfeature, trainlabelprimer, testlabelprimer;
trainfeature.read_csv("traindata.csv");
Mat traindata = Mat(trainfeature.get_values(), true);
testfeature.read_csv("testdata.csv");
Mat testdata = Mat(testfeature.get_values(), true);
trainlabelprimer.read_csv("trainlabel.csv");
Mat trainlabel = Mat(trainlabelprimer.get_values(), true);
testlabelprimer.read_csv("testlabel.csv");
Mat testlabel = Mat(testlabelprimer.get_values(), true);
int positivenum_train = 128, negativenum_train = 238;
//make trainlabel into vector<int>
vector<int> labelfortrain;
for (int i = 0; i < trainlabel.rows; ++i)
{
uchar* data = trainlabel.ptr<uchar>(i);
labelfortrain.push_back(data[0]);
}
//do LDA
//這裡要求訓練樣本總數N要大於特徵的維數D
cout << "LDA training..." << endl;
LDA lda = LDA(traindata, labelfortrain);
cout << "LDA training done!" << endl;
//get the eigenvector //相當於matlab裡LDATraining所得model的W dimsX(num-1)
Mat eivector = lda.eigenvectors().clone();
//get the mean after projection //相當於matlab裡LDATraining所得model的means 1X2
Mat projectionimg=lda.project(traindata);
Scalar mean,stddev;
meanStdDev(projectionimg, mean, stddev);
float getmean = mean[0];
cout << "2 classes mean:" << getmean << endl;
//cout << projectionimg << endl;
Mat myimg1 = projectionimg.rowRange(0,positivenum_train).clone();
//cout << myimg1 << endl;
Mat myimg2 = projectionimg.rowRange(positivenum_train, projectionimg.rows).clone();
//cout << myimg2<< endl;
Scalar mean1, stddev1;
meanStdDev(myimg1, mean1, stddev1);
float getmean1 = mean1[0];
cout << "1th classes sample number:"<<myimg1.rows<<" and its mean:" << getmean1 << endl;
Scalar mean2, stddev2;
meanStdDev(myimg2, mean2, stddev2);
float getmean2 = mean2[0];
cout << "2th classes sample number:"<<myimg2.rows<<" and its mean " << getmean2 << endl;
return 0;
}
這樣就可以算出兩個類分別投影后的均值了。
這樣matlab的model在opencv裡就有了 接下來就根據測試函式寫opencv的測試函數了
function target=LDATesting(input,k,model,ClassLabel)
% input: n*d matrix,representing samples
% target: n*1 matrix,class label
% model: struct type(see codes below)
% k: the total class number
% ClassLabel: the class name of each class
[n,~]=size(input);
s=zeros(n,k);
target=zeros(n,1);
for j=1:k*(k-1)/2
a=model(j).a;
b=model(j).b;
w=model(j).W;
m=model(j).means;
for i=1:n
sample=input(i,:);
tmp=sample*w;
if norm(tmp-m(1,:))<norm(tmp-m(2,:))
s(i,a)=s(i,a)+1;
else
s(i,b)=s(i,b)+1;
end
end
end
for i=1:n
pos=1;
maxV=0;
for j=1:k
if s(i,j)>maxV
maxV=s(i,j);
pos=j;
end
end
target(i)=ClassLabel(pos);
end
發現opencv裡的LDA真是矯情,首先要求特徵維數要小於樣本個數 而matlab裡的LDA就沒這個規定 還有要求樣本標籤不能像matlab裡一樣nX1的向量 如[11111122222]之類的 也不能是nX2的矩陣 如[0 1]代表第二類 [1 0]代表第一類 我試過這兩種標籤都報錯然後改成用[0.9 0.1]代表第一類 [0.1 0.9]代表第二類 就不報錯了
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/ml/ml.hpp>
#include <iostream>
#include"contrib.hpp"
using namespace cv;
using namespace std;
int main()
{
//read traindata trainlabel testdata testlabel and make them into Mat
CvMLData trainfeature, testfeature, trainlabelprimer, testlabelprimer;
trainfeature.read_csv("traindata.csv");
Mat traindata = Mat(trainfeature.get_values(), true);
testfeature.read_csv("testdata.csv");
Mat testdata = Mat(testfeature.get_values(), true);
trainlabelprimer.read_csv("trainlabel.csv"); //1th :[1 0] 2th:[0 1]
Mat trainlabel = Mat(trainlabelprimer.get_values(), true);
testlabelprimer.read_csv("testlabel.csv");
Mat testlabel = Mat(testlabelprimer.get_values(), true);
int positivenum_train = 128, negativenum_train = 238;
//make trainlabel into vector<int>
vector<int> labelfortrain;
for (int i = 0; i < trainlabel.rows; ++i)
{
uchar* data = trainlabel.ptr<uchar>(i);
labelfortrain.push_back(data[0]);
}
//do LDA
//這裡要求訓練樣本總數N要大於特徵的維數D
cout << "LDA training..." << endl;
LDA lda = LDA(traindata, labelfortrain);
cout << "LDA training done!" << endl;
//get the eigenvector //相當於matlab裡LDATraining所得model的W dimsX(num-1)
Mat eivector = lda.eigenvectors().clone();
//get the mean after projection //相當於matlab裡LDATraining所得model的means 1X2
Mat projectionimg=lda.project(traindata);
//Scalar mean,stddev;
//meanStdDev(projectionimg, mean, stddev);
//float getmean = mean[0];
//cout << "2 classes mean:" << getmean << endl;
//cout << projectionimg << endl;
Mat myimg1 = projectionimg.rowRange(0,positivenum_train).clone();
//cout << myimg1 << endl;
Mat myimg2 = projectionimg.rowRange(positivenum_train, projectionimg.rows).clone();
//cout << myimg2<< endl;
Scalar mean1, stddev1;
meanStdDev(myimg1, mean1, stddev1);
float getmean1 = mean1[0];
cout << "1th classes sample number:"<<myimg1.rows<<" and its mean:" << getmean1 << endl;
Scalar mean2, stddev2;
meanStdDev(myimg2, mean2, stddev2);
float getmean2 = mean2[0];
cout << "2th classes sample number:"<<myimg2.rows<<" and its mean " << getmean2 << endl;
//////////////////////////////////////test
cout << "LDA Testing..." << endl;
Mat testlabelout(testlabel.size(), testlabel.type()); //for LDA output label //matlab:target
Mat s(testlabel.rows, 2, CV_32F,cv::Scalar(0));
int a = 1, b = 2;
for (int i = 0; i < testdata.rows; i++)
{
float *currentrow = s.ptr<float>(i);
Mat sample = testdata.rowRange(i, i+1).clone();
Mat tmp = lda.project(sample);
if (norm(tmp - getmean1)<norm(tmp - getmean2))
currentrow[a] = currentrow[a] + 1;
else
currentrow[b] = currentrow[b] + 1;
}
//cout << s << endl;
cout << "LDA Test done!" << endl;
///////////////////////////accurency
int correct = 0;
float accurency;
for (int i = 0; i < s.rows; ++i)
{
float* p = s.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
//if (p[0] > p[1])
//cout << 0.9 << " " << 0.1 << " " << being[0] << " " << being[1] << endl;
//else
//cout << 0.1 << " " << 0.9 << " " << being[0] << " " << being[1] << endl;
if (((p[0] > p[1]) && (being[0] > being[1])) || ((p[0] < p[1]) && (being[0] < being[1])))
++correct;
}
cout << endl;
accurency = (float)correct / testlabel.rows;
cout << "whole accurency= " << accurency << endl;
///////////////////////////////////////////////////////////////
int alluseless = 0, uselesserror = 0, alluseful = 0, usefulerror = 0;
float stoneerror = 0, kuangerror = 0;
for (int i = 0; i < s.rows; ++i)
{
float* p = s.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
if (being[0] == (float)0.1)
{
++alluseless;
if (p[0] > p[1])
++uselesserror;
}
}
cout << alluseless << " " << uselesserror << endl;
stoneerror = (float)uselesserror / alluseless;
cout << " 1th class error= " << stoneerror << endl;
for (int i = 0; i < s.rows; ++i)
{
float* p = s.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
if (being[0] == (float)0.9)
{
++alluseful;
if (p[0] < p[1])
++usefulerror;
}
}
cout << alluseful << " " << usefulerror << endl;
kuangerror = (float)usefulerror / alluseful;
cout << " 2th class error= " << kuangerror << endl;
return 0;
}
我隨便自創的特徵矩陣 所以出來準確率低。。。不過沒關係 重點不是這個 目前是不報錯 看起來opencv的LDA初步是這樣了。 。。可是這個第一類120錯45張 第二類64錯22張 總準確率不應該是15% 哦我知道哪裡錯了 出來的s竟然有[0 0]和[1 1]的!!!matlab下面的就沒有 matlab下面的只有[0 1]和[1 0]兩種情況 奇怪 改成下面這樣就可以算準確率了:
///////////////////////////accurency
Mat s2(testlabel.rows, 2, CV_32F, cv::Scalar(0));
int correct = 0;
float accurency;
for (int i = 0; i < s.rows; ++i)
{
float* p = s.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
float* p2 = s2.ptr<float>(i);
if (p[0] > p[1])
{
//cout << 0.9 << " " << 0.1 << " " << being[0] << " " << being[1] << endl;
p2[0] = float(0.9);
p2[1] = float(0.1);
}
else
{
//cout << 0.1 << " " << 0.9 << " " << being[0] << " " << being[1] << endl;
p2[0] = float(0.1);
p2[1] = float(0.9);
}
}
for (int i = 0; i < s2.rows; ++i)
{
float* p2 = s2.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
if (((p2[0] > p2[1]) && (being[0] > being[1])) || ((p2[0] < p2[1]) && (being[0] < being[1])))
++correct;
}
cout <<testlabel.rows<<" "<<correct<< endl;
accurency = (float)correct / testlabel.rows;
cout << "whole accurency= " << accurency << endl;
///////////////////////////////////////////////////////////////
int alluseless = 0, uselesserror = 0, alluseful = 0, usefulerror = 0;
float stoneerror = 0, kuangerror = 0;
for (int i = 0; i < s2.rows; ++i)
{
float* p = s2.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
if (being[0] == (float)0.1)
{
++alluseless;
if (p[0] > p[1])
++uselesserror;
}
}
cout << alluseless << " " << uselesserror << endl;
stoneerror = (float)uselesserror / alluseless;
cout << " 1th class error= " << stoneerror << endl;
for (int i = 0; i < s2.rows; ++i)
{
float* p = s2.ptr<float>(i);
float* being = testlabel.ptr<float>(i);
if (being[0] == (float)0.9)
{
++alluseful;
if (p[0] < p[1])
++usefulerror;
}
}
cout << alluseful << " " << usefulerror << endl;
kuangerror = (float)usefulerror / alluseful;
cout << " 2th class error= " << kuangerror << endl;
return 0;
120-45+(64-52)正好等於87 對了。。。
可是我用同樣的特徵矩陣 去matlab下的LDA
均值和opencv下得到的不一樣??W和opencv下的eivector都是170X1 我沒看它們是否一樣 但均值起碼就不一樣了??LDA測試時得到的標籤也不一樣??我的應該還是存在問題的吧?可問題在哪裡呢