Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程
1.樣本的準備
樣本可以使用之前訓練的模型,通過OpenCV的imwrite截圖儲存然後再人工篩選,這個C4-Real-time-pedestrian-detection工程裡面我有實現。也可以自己寫一個程式,手動截圖。將正樣本都裁剪成只包含一個人的並歸一化到108*36解析度大小,負樣本也是要歸一化到108*36大小。
把已經準備好的樣本,檔名順序命名(如1.jpg、2.jpg、3.jpg……),然後將正樣本放入C4_SVM_Train_Data\PositiveSamples工程目錄下,負樣本放入C4_SVM_Train_Data\NegativeSamples工程目錄下,修改工程檔案C4_SVM_Train_Data.cpp中main函式里正負樣本引數PosSampleNum、NegSampleNum,設定為實際使用的數量。編譯執行程式即可生成訓練所需的樣本檔案samples.txt。程式碼是參考Jianxin Wu的論文實現的,如下所示
#include "stdafx.h" #include<opencv2/core/core.hpp> #include<opencv2/highgui/highgui.hpp> #include<opencv2/imgproc/imgproc.hpp> #include<opencv2/ml/ml.hpp> #include<iostream> #include<sstream> #include<vector> #include<string> #include <fstream> #include <bitset> using namespace cv; using namespace std; void calcFeatures(const Mat &imgSrc, vector<float> &features); void ComputeSobel(const Mat &gray_image, Mat &sobel_image); void ComputeCT(const Mat &sobel_image, Mat &CT_image); void generate_sample_list(int posNum, int negNum); void generateTrainingData(int nClass, int nDims, int posNum, int negNum); int _tmain(int argc, _TCHAR* argv[]) { int PosSampleNum = 5103; //正樣本個數 int NegSampleNum = 3102; //負樣本個數 int nSamples = PosSampleNum + NegSampleNum; //樣本總數 int nDims = 6144; //特徵維數 int nClass = 2; //總類別數 generateTrainingData(nClass, nDims, PosSampleNum, NegSampleNum); waitKey(0); return 0; } void calcFeatures(const Mat &imgSrc, vector<float> &features) { if (imgSrc.empty()) { cout << "Invalid Input!" << endl; return ; } Mat gray_image(imgSrc.size(), CV_8UC1); cvtColor(imgSrc, gray_image, CV_BGR2GRAY); Mat sobel_image(gray_image.size(), CV_32FC1); ComputeSobel(gray_image, sobel_image); //imshow("Sobel-Image", sobel_image); Mat CT_feature_image(gray_image.size(), CV_32FC1); ComputeCT(sobel_image, CT_feature_image); //imshow("CT_feature",CT_feature_image); //檢測視窗的大小為36*108,然後將該檢測視窗劃分為4*9個block,每個block的大小是9*12 //每相鄰的4個block作為一個super-block,用該super-block來提取CENTRIST(Ct_feature)特徵, //橫向移動步長為9,縱向移動步長為12,每個super-block橫向可以移動3下,縱向可以移動8下, //一個檢測視窗一共可以產生(9-1)*(4-1)= 8*3 = 24個super-block, //計算每個super-block的直方圖,統計[0-255]共256個特徵值每個值出現的次數,最終將生成256*24=6144維的特徵。 int width = 36; int height = 108; int stepsize = 2; int baseflength = 256; //[0-255] int xdiv = 9; int ydiv = 12; int EXT = 1; MatND hist; int hist_size[1]; float hranges[2]; const float* ranges[1]; int channels[1]; hist_size[0] = 256; hranges[0] = 0.0; hranges[1] = 255.0; ranges[0] = hranges; channels[0] = 0; for (int i = 0; i < height - ydiv; i += ydiv) { for (int j = 0; j < width - xdiv; j += xdiv) { Rect super_block_rect(j, i, 2 * xdiv, 2 * ydiv); Mat super_block_image = CT_feature_image(super_block_rect); calcHist(&super_block_image, 1, channels, Mat(), hist, 1, hist_size, ranges, true, false); for (int k = 0; k < 256; k++) { features.push_back(hist.at<float>(k)); } } } } void ComputeSobel(const Mat &gray_image, Mat &sobel_image) { for (int i = 1; i < gray_image.rows - 1; i++) { for (int j = 1; j < gray_image.cols - 1; j++) { int Gx = (int)gray_image.at<uchar>(i - 1, j - 1) * (-1)+ (int)gray_image.at<uchar>(i - 1, j) * (-2)+ (int)gray_image.at<uchar>(i - 1, j + 1) * (-1)+ (int)gray_image.at<uchar>(i + 1, j - 1)+ (int)gray_image.at<uchar>(i + 1, j) * 2+ (int)gray_image.at<uchar>(i + 1, j + 1); int Gy = (int)gray_image.at<uchar>(i - 1, j - 1) * (-1)+ (int)gray_image.at<uchar>(i, j - 1) * (-2)+ (int)gray_image.at<uchar>(i + 1, j - 1) * (-1)+ (int)gray_image.at<uchar>(i - 1, j + 1)+ (int)gray_image.at<uchar>(i, j + 1) * 2+ (int)gray_image.at<uchar>(i + 1, j + 1); float G = (float)(Gx * Gx + Gy * Gy); sobel_image.at<float>(i, j) = G; } } } void ComputeCT(const Mat &sobel_image, Mat &CT_image) { for (int i = 2; i < sobel_image.rows - 2; i++) { for (int j = 2; j < sobel_image.cols - 2; j++) { int index = 0; //if與多個else if,只會執行其中一個條件,這裡被自己挖的坑耽誤了好幾天,現在改寫為多個if語句 if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j - 1)) { index += 0x80; //128 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j)) { index += 0x40; //64 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j + 1)) { index += 0x20; //32 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i, j - 1)) { index += 0x10; //16 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i, j + 1)) { index += 0x08; //8 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j - 1)) { index += 0x04; //4 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j)) { index += 0x02; //2 } if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j + 1)) { index += 0x01; //1 } CT_image.at<float>(i, j) = (float)index; } } } void generate_sample_list(int posNum, int negNum) { char imageName[100]; FILE* pos_fp; pos_fp = fopen("PositiveSamplesList.txt","wb+"); for (int i = 1; i <= posNum; i++) { sprintf(imageName,"%d.jpg",i); fprintf(pos_fp,"%s\r\n",imageName); } fclose(pos_fp); FILE* neg_fp; neg_fp = fopen("NegativeSamplesList.txt","wb+"); for (int i = 1; i <= negNum; i++) { sprintf(imageName,"%d.jpg",i); fprintf(neg_fp,"%s\r\n",imageName); } fclose(neg_fp); } void generateTrainingData(int nClass, int nDims, int posNum, int negNum) { int number = 0; int nCount = 0; Mat input_image; vector<float> features; vector<float> labels; generate_sample_list(posNum, negNum);//生成正負樣本檔名列表 string ImgName;//圖片名(絕對路徑) ifstream finPos("PositiveSamplesList.txt");//正樣本圖片的檔名列表 ifstream finNeg("NegativeSamplesList.txt");//負樣本圖片的檔名列表 Mat sampleFeatureMat;//所有訓練樣本的特徵向量組成的矩陣,行數等於所有樣本的個數,列數等於HOG描述子維數 Mat sampleLabelMat;//訓練樣本的類別向量,行數等於所有樣本的個數,列數等於1;1表示有人,0表示無人 for(int i = 0; i < posNum && getline(finPos,ImgName); i++) { ImgName = "..\\pos-2\\" + ImgName;//加上正樣本的路徑名 input_image = imread(ImgName);//讀取圖片 calcFeatures(input_image, features); } cout << "Finished processing positive samlpes !" << endl; for (int j = 0; j < negNum && getline(finNeg,ImgName); j++) { ImgName = "..\\neg-2\\" + ImgName;//加上正樣本的路徑名 input_image = imread(ImgName);//讀取圖片 calcFeatures(input_image, features); } cout << "Finished processing negative samlpes !" << endl; //write the feature data into a txt file, the format must refer to libliner's reference FILE * fp; fp = fopen("samples.txt","wb+");;//建立一個txt檔案,用於寫入資料的,每次寫入資料追加到檔案尾 for (int m = 0; m < (posNum + negNum); m++) { if (m < posNum) { int lable = 1; // positive sample lable 1 fprintf(fp,"%d ",lable); } else { int lable = -1; // negative sample lable -1 fprintf(fp,"%d ",lable); } for(int n = 0; n < nDims; n++) { fprintf(fp,"%d:%f ",(n+1),features.at(m * nDims + n)); } fprintf(fp,"\r\n"); } cout << "Generate Training Data Complete!" << endl << endl; }
2.訓練工具的準備
liblinear-2.20、libHIK-2.07、OpenCV 2.4.9。
liblinear-2.20直接解壓縮就可以使用。
libHIK-2.07解壓縮之後需要編譯,編譯過程可以參考libHIK-2.07\libHIK目錄中的libHIK_v2.pdf,在3.3 Installation in Windows 中有具體的說明。
首先,用記事本開啟libHIK-2.07\libHIK中的Makefile.win檔案,將第13行OPENCV_INCLUDE的條目分別修改為本機的(下面是我的)
D:\Program Files (x86)\opencv\build\include\opencv
D:\Program Files (x86)\opencv\build\include
然後,修改第14行的OPENCV_LIB條目為本機的(下面是我的)
D:\Program Files (x86)\opencv\build\x64\vc10\lib
接著,修改第80行的opencv_highgui231.lib為opencv_highgui249.lib
修改第80行的opencv_core231.lib為opencv_core249.lib
修改第80行的opencv_imgproc231.lib為opencv_imgproc249.lib
最後,開啟Visual Studio x64 Win64命令提示(2010),在命令列將目錄切換到libHIK-2.07\libHIK下,執行命令WinMake.bat 即可完成編譯。
3.訓練過程
首先,將步驟1中生成的samples.txt複製到liblinear-2.20\windows目錄中,然後在cmd命令列裡輸入命令:
train.exe -s 2 -e 0.000001 -B 1 samples.txt combined.txt.model
這樣就生成了第1個行人檢測模型。
然後,將步驟1中生成的samples.txt複製到libHIK-2.07\libHIK\windows目錄中,然後在cmd命令列裡輸入命令:
train_HIK.exe -s 1 -u 256 -e 0.000001 -B 1 samples.txt combined2.txt.model
這樣就生成了第2個行人檢測模型。
最後,將訓練好的2個模型複製到行人檢測演算法的工程資料夾下面,就可使用了。