Opencv2.4.9原始碼分析——SURF
SURF (Speeded Up Robust Features)是一種具有魯棒性的區域性特徵檢測演算法,它首先由Herbert Bay等人於2006年提出,並在2008年進行了完善。其實該演算法是Herbert Bay在博士期間的研究內容,並作為博士畢業論文的一部分發表。
SURF演算法的部分靈感來自於SIFT演算法,但正如它的名字一樣,該演算法除了具有重複性高的檢測器和可區分性好的描述符特點外,還具有很強的魯棒性以及更高的運算速度,如Bay所述,SURF至少比SIFT快3倍以上,綜合性能要優於SIFT演算法。與SIFT演算法一樣,SURF演算法也在美國申請了專利。
與我的上一篇SIFT文章一樣,該文分為原理解釋、原始碼分析和應用例項。但該文的內容也較多,對公式和圖表的貼上複製太麻煩,所以我也把這篇文章上傳到了百度文庫和csdn,這樣更方便大家閱讀,網址為:
http://download.csdn.net/detail/zhaocj/8347849
http://wenku.baidu.com/view/2f1e4d8ef705cc1754270945.html
在這裡我僅把原始碼分析部分複製到這裡,建議大家還是把原理和原始碼結合著看,這樣更容易理解,所以最好還是看詳細的全文。
實現SURF演算法的檔案為sources/modules/nonfree/src/surf.cpp
SURF類的建構函式為:
SURF::SURF()
SURF::SURF(doublehessianThreshold, int nOctaves=4, int nOctaveLayers=2, bool extended=true,
bool upright=false )
引數的含義為:
hessianThreshold為Hessian矩陣行列式響應值的閾值
nOctaves為影象堆的組數
nOctaveLayers為影象堆中每組中的中間層數,該值加2等於每組影象中所包含的層數
extended表示是128維描述符,還是64維描述符,為true時,表示128維描述符
upright表示是否採用U-SURF演算法,為true時,採用U-SURF演算法
SURF類的過載運算子( )為:
void SURF::operator()(InputArray _img, InputArray _mask, CV_OUT vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints) const //_img表示輸入8位灰度影象 //_mask為掩碼矩陣 //keypoints為特徵點向量陣列 //_descriptors為特徵點描述符 //useProvidedKeypoints表示是否執行特徵點的檢測,該值為true表示不進行特徵點的檢測,而只是利用輸入的特徵點進行特徵點描述符的運算。 { //定義、初始化各種矩陣,sum為輸入影象的積分影象矩陣,mask1和msum為掩碼所需矩陣 Mat img = _img.getMat(), mask = _mask.getMat(), mask1, sum, msum; //變數doDescriptors表示是否進行特徵點描述符的運算 bool doDescriptors = _descriptors.needed(); //確保輸入影象的正確 CV_Assert(!img.empty() && img.depth() == CV_8U); if( img.channels() > 1 ) cvtColor(img, img, COLOR_BGR2GRAY); //確保各類輸入引數的正確 CV_Assert(mask.empty() || (mask.type() == CV_8U && mask.size() == img.size())); CV_Assert(hessianThreshold >= 0); CV_Assert(nOctaves > 0); CV_Assert(nOctaveLayers > 0); //計算輸入影象的積分影象 integral(img, sum, CV_32S); // Compute keypoints only if we are not asked for evaluating the descriptors are some given locations: //是否進行特徵點檢測運算,useProvidedKeypoints=false表示進行該運算 if( !useProvidedKeypoints ) { if( !mask.empty() ) //掩碼 { cv::min(mask, 1, mask1); integral(mask1, msum, CV_32S); //msum為掩碼影象的積分影象 } //進行Fast-Hessian特徵點檢測,fastHessianDetector函式在後面給出詳細解釋 fastHessianDetector( sum, msum, keypoints, nOctaves, nOctaveLayers, (float)hessianThreshold ); } //N為所檢測到的特徵點數量 int i, j, N = (int)keypoints.size(); if( N > 0 ) //如果檢測到了特徵點 { Mat descriptors; //描述符變數矩陣 bool _1d = false; //由extended變數得到描述符是128維的還是64維的 int dcols = extended ? 128 : 64; //一個描述符的儲存空間大小 size_t dsize = dcols*sizeof(float); if( doDescriptors ) //需要得到描述符 { //定義所有描述符的排列形式 _1d = _descriptors.kind() == _InputArray::STD_VECTOR && _descriptors.type() == CV_32F; if( _1d ) { //所有特徵點的描述符連在一起,組成一個向量 _descriptors.create(N*dcols, 1, CV_32F); descriptors = _descriptors.getMat().reshape(1, N); } else { //一個特徵點就是一個描述符,一共N個描述符 _descriptors.create(N, dcols, CV_32F); descriptors = _descriptors.getMat(); } } // we call SURFInvoker in any case, even if we do not need descriptors, // since it computes orientation of each feature. //方向角度的分配和描述符的生成,SURFInvoker在後面給出了詳細的解釋 parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) ); // remove keypoints that were marked for deletion //刪除那些被標註為無效的特徵點 // i表示特徵點刪除之前的特徵點索引,j表示刪除之後的特徵點索引 for( i = j = 0; i < N; i++ ) { if( keypoints[i].size > 0 ) //size大於0,表示該特徵點為有效的特徵點 { if( i > j ) // i > j表示在i之前有被刪除的特徵點 { //用後面的特徵點依次填補被刪除的那些特徵點的位置 keypoints[j] = keypoints[i]; //相應的描述符的空間位置也要調整 if( doDescriptors ) memcpy( descriptors.ptr(j), descriptors.ptr(i), dsize); } j++; //索引值計數 } } //N表示特徵點刪除之前的總數,j表示刪除之後的總數,N > j表明有一些特徵點被刪除 if( N > j ) { //重新賦值特徵點的總數 N = j; keypoints.resize(N); if( doDescriptors ) { //根據描述符的排列形式,重新賦值 Mat d = descriptors.rowRange(0, N); if( _1d ) d = d.reshape(1, N*dcols); d.copyTo(_descriptors); } } } }
SURF演算法中特徵點檢測函式fastHessianDetector:
static void fastHessianDetector( const Mat& sum, const Mat& mask_sum, vector<KeyPoint>& keypoints, int nOctaves, int nOctaveLayers, float hessianThreshold )
{
/* Sampling step along image x and y axes at first octave. This is doubled
for each additional octave. WARNING: Increasing this improves speed,
however keypoint extraction becomes unreliable. */
//為了減少運算量,提高計算速度,程式設定了取樣間隔。即在第一組的各層影象的每個畫素都通過盒狀濾波器模組進行濾波處理,而在第二組的各層影象中採用隔點(取樣間隔為21)濾波器處理的方式,第三組的取樣間隔為22,其他組以此類推。
//定義初始取樣間隔,即第一組各層影象的取樣間隔
const int SAMPLE_STEP0 = 1;
//總的層數,它等於組數乘以每組的層數
int nTotalLayers = (nOctaveLayers+2)*nOctaves;
//中間層數,即進行非最大值抑制的層數
int nMiddleLayers = nOctaveLayers*nOctaves;
//各類向量變數,dets為Hessian矩陣行列式向量矩陣,traces為Hessian矩陣跡向量矩陣,sizes為盒狀濾波器的尺寸大小向量,sampleSteps為每一層的取樣間隔向量,middleIndices為中間層(即進行非最大值抑制的層)的索引向量
vector<Mat> dets(nTotalLayers);
vector<Mat> traces(nTotalLayers);
vector<int> sizes(nTotalLayers);
vector<int> sampleSteps(nTotalLayers);
vector<int> middleIndices(nMiddleLayers);
//清空特徵點變數
keypoints.clear();
// Allocate space and calculate properties of each layer
//index為變數nTotalLayers的索引,middleIndex為變數nMiddleLayers的索引,step為取樣間隔
int index = 0, middleIndex = 0, step = SAMPLE_STEP0;
//分配記憶體空間,計算各層的資料
//由於採用了取樣間隔,因此各層影象所檢測到的行列式、跡等數量會不同,這樣就需要為每一層分配不同的空間大小,儲存不同數量的資料
for( int octave = 0; octave < nOctaves; octave++ )
{
for( int layer = 0; layer < nOctaveLayers+2; layer++ )
{
/* The integral image sum is one pixel bigger than the source image*/
dets[index].create( (sum.rows-1)/step, (sum.cols-1)/step, CV_32F );
traces[index].create( (sum.rows-1)/step, (sum.cols-1)/step, CV_32F );
//SURF_HAAR_SIZE0 = 9表示第一組第一層所使用的盒狀濾波器的尺寸大小
//SURF_HAAR_SIZE_INC = 6表示盒狀濾波器尺寸大小增加的畫素數量
//公式11
sizes[index] = (SURF_HAAR_SIZE0 + SURF_HAAR_SIZE_INC*layer) << octave;
sampleSteps[index] = step;
if( 0 < layer && layer <= nOctaveLayers )
middleIndices[middleIndex++] = index; //標記中間層對應於影象堆中的所在層
index++; //層的索引值加1
}
step *= 2; //取樣間隔翻倍
}
// Calculate hessian determinant and trace samples in each layer
//計算每層的Hessian矩陣的行列式和跡,具體實現的函式為SURFBuildInvoker,該函式在後面給出詳細的講解
//parallel_for_表示這些計算可以進行平行處理(當然在編譯的時候要選擇TBB才能實現平行計算)。
parallel_for_( Range(0, nTotalLayers),
SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
// Find maxima in the determinant of the hessian
//檢測特徵點,具體實現的函式為SURFFindInvoker,該函式在後面給出詳細的講解
parallel_for_( Range(0, nMiddleLayers),
SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
sampleSteps, middleIndices, keypoints,
nOctaveLayers, hessianThreshold) );
//特徵點排序
std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
}
計算行列式和跡的類:
// Multi-threaded construction of the scale-space pyramid
struct SURFBuildInvoker : ParallelLoopBody
{
//建構函式
SURFBuildInvoker( const Mat& _sum, const vector<int>& _sizes,
const vector<int>& _sampleSteps,
vector<Mat>& _dets, vector<Mat>& _traces )
{
sum = &_sum;
sizes = &_sizes;
sampleSteps = &_sampleSteps;
dets = &_dets;
traces = &_traces;
}
//過載( )運算子
void operator()(const Range& range) const
{
//遍歷所有層,計算每層的行列式和跡,calcLayerDetAndTrace函式在後面給出詳細介紹
for( int i=range.start; i<range.end; i++ )
calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
}
//定義類的成員變數
const Mat *sum;
const vector<int> *sizes;
const vector<int> *sampleSteps;
vector<Mat>* dets;
vector<Mat>* traces;
};
計算某一層的行列式和跡calcLayerDetAndTrace函式:
/*
* Calculate the determinant and trace of the Hessian for a layer of the
* scale-space pyramid
*/
static void calcLayerDetAndTrace( const Mat& sum, int size, int sampleStep,
Mat& det, Mat& trace )
{
//NX、NY、NXY分別表示Dxx模板、Dyy模板和Dxy模板中突起部分的數量,即公式5中的變數N
const int NX=3, NY=3, NXY=4;
//dx_s變數、dy_s變數、dxy_s變數的意思是分別用二維陣列表示9×9的Dxx模板、Dyy模板和Dxy模板,二維陣列的第二維代表模板的突起部分,第一維代表突起部分的座標位置和權值。以模板的左上角為座標原點,陣列第一維中5個元素中的前兩個表示突起部分左上角座標,緊接著的後2個元素表示突起部分的右下角座標,最後一個元素表示該突起部分的權值
const int dx_s[NX][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
const int dy_s[NY][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
const int dxy_s[NXY][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
//Dx、Dy和Dxy為尺寸重新調整以後的模板的資訊資料
SurfHF Dx[NX], Dy[NY], Dxy[NXY];
//判斷盒狀濾波器的尺寸大小是否超出了輸入影象的邊界
if( size > sum.rows-1 || size > sum.cols-1 )
return;
//由9×9的初始模板生成其他尺寸大小的盒狀濾波器模板,resizeHaarPattern函式在後面給出詳細的解釋
resizeHaarPattern( dx_s , Dx , NX , 9, size, sum.cols );
resizeHaarPattern( dy_s , Dy , NY , 9, size, sum.cols );
resizeHaarPattern( dxy_s, Dxy, NXY, 9, size, sum.cols );
/* The integral image 'sum' is one pixel bigger than the source image */
//由該層的取樣間隔計算影象的行和列的取樣畫素的數量
int samples_i = 1+(sum.rows-1-size)/sampleStep;
int samples_j = 1+(sum.cols-1-size)/sampleStep;
/* Ignore pixels where some of the kernel is outside the image */
//由該層的模板尺寸和取樣間隔計算因為邊界效應而需要預留的影象邊界的大小
int margin = (size/2)/sampleStep;
//遍歷該層所有的取樣畫素
for( int i = 0; i < samples_i; i++ )
{
//在積分影象中,行首地址指標
const int* sum_ptr = sum.ptr<int>(i*sampleStep);
//去掉邊界預留部分的該行的行列式首地址指標
float* det_ptr = &det.at<float>(i+margin, margin);
//去掉邊界預留部分的該行的跡首地址指標
float* trace_ptr = &trace.at<float>(i+margin, margin);
for( int j = 0; j < samples_j; j++ )
{
//利用公式5計算三個盒狀濾波器的響應值,calcHaarPattern函式很簡單,就不再給出解釋
float dx = calcHaarPattern( sum_ptr, Dx , 3 );
float dy = calcHaarPattern( sum_ptr, Dy , 3 );
float dxy = calcHaarPattern( sum_ptr, Dxy, 4 );
//指向積分影象中該行的下一個畫素
sum_ptr += sampleStep;
//利用公式6計算行列式
det_ptr[j] = dx*dy - 0.81f*dxy*dxy;
//利用公式8計算跡
trace_ptr[j] = dx + dy;
}
}
}
由9×9的初始模板生成其他尺寸大小的盒狀濾波器模板resizeHaarPattern函式:
// src為源模板,即9×9的模板
// dst為得到的新模板的資訊資料
// n為該模板的突起部分的數量
// oldSize為源模板src的尺寸,即為9
// newSize為新模板dst的尺寸
// widthStep為積分影象的寬步長,也就是輸入影象的寬步長
static void
resizeHaarPattern( const int src[][5], SurfHF* dst, int n, int oldSize, int newSize, int widthStep )
{
//由新、舊兩個模板的尺寸計算它們的比值
float ratio = (float)newSize/oldSize;
//遍歷模板的所有突起部分
for( int k = 0; k < n; k++ )
{
//由比值計算新模板突起部分的左上角和右下角座標
int dx1 = cvRound( ratio*src[k][0] );
int dy1 = cvRound( ratio*src[k][1] );
int dx2 = cvRound( ratio*src[k][2] );
int dy2 = cvRound( ratio*src[k][3] );
//由突起部分的座標計算它在積分影象的座標位置,即圖1中的四個點A、B、C、D
dst[k].p0 = dy1*widthStep + dx1;
dst[k].p1 = dy2*widthStep + dx1;
dst[k].p2 = dy1*widthStep + dx2;
dst[k].p3 = dy2*widthStep + dx2;
//公式5中的wn/sn
dst[k].w = src[k][4]/((float)(dx2-dx1)*(dy2-dy1));
}
}
檢測特徵點的類:
// Multi-threaded search of the scale-space pyramid for keypoints
struct SURFFindInvoker : ParallelLoopBody
{
//建構函式
SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
const vector<Mat>& _dets, const vector<Mat>& _traces,
const vector<int>& _sizes, const vector<int>& _sampleSteps,
const vector<int>& _middleIndices, vector<KeyPoint>& _keypoints,
int _nOctaveLayers, float _hessianThreshold )
{
sum = &_sum; //積分影象
mask_sum = &_mask_sum; //掩碼所需的積分影象
dets = &_dets; //行列式
traces = &_traces; //跡
sizes = &_sizes; //尺寸
sampleSteps = &_sampleSteps; //取樣間隔
middleIndices = &_middleIndices; //中間層
keypoints = &_keypoints; //特徵點
nOctaveLayers = _nOctaveLayers; //層
hessianThreshold = _hessianThreshold; //行列式的閾值
}
//成員函式,該函式在後面給出詳細解釋
static void findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
const vector<Mat>& dets, const vector<Mat>& traces,
const vector<int>& sizes, vector<KeyPoint>& keypoints,
int octave, int layer, float hessianThreshold, int sampleStep );
//過載( )運算子
void operator()(const Range& range) const
{
//遍歷中間層
for( int i=range.start; i<range.end; i++ )
{
int layer = (*middleIndices)[i]; //提取出中間層所對應的影象堆中的所在層
int octave = i / nOctaveLayers; //計算該層所對應的組索引
//檢測特徵點
findMaximaInLayer( *sum, *mask_sum, *dets, *traces, *sizes,
*keypoints, octave, layer, hessianThreshold,
(*sampleSteps)[layer] );
}
}
//成員變數
const Mat *sum;
const Mat *mask_sum;
const vector<Mat>* dets;
const vector<Mat>* traces;
const vector<int>* sizes;
const vector<int>* sampleSteps;
const vector<int>* middleIndices;
vector<KeyPoint>* keypoints;
int nOctaveLayers;
float hessianThreshold;
static Mutex findMaximaInLayer_m;
};
檢測特徵點的findMaximaInLayer函式:
/*
* Find the maxima in the determinant of the Hessian in a layer of the
* scale-space pyramid
*/
void SURFFindInvoker::findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
const vector<Mat>& dets, const vector<Mat>& traces,
const vector<int>& sizes, vector<KeyPoint>& keypoints,
int octave, int layer, float hessianThreshold, int sampleStep )
{
// Wavelet Data
//下面變數在使用掩碼時會用到
const int NM=1;
const int dm[NM][5] = { {0, 0, 9, 9, 1} };
SurfHF Dm;
//該層所用的濾波器模板的尺寸
int size = sizes[layer];
// The integral image 'sum' is one pixel bigger than the source image
//根據取樣間隔得到該層影象每行和每列的取樣畫素數量
int layer_rows = (sum.rows-1)/sampleStep;
int layer_cols = (sum.cols-1)/sampleStep;
// Ignore pixels without a 3x3x3 neighbourhood in the layer above
//在每組內,上一層所用的濾波器模板要比下一層的模板尺寸大,因此由於邊界效應而預留的邊界空白區域就要多,所以在進行3×3×3鄰域比較時,就要把多出來的這部分割槽域去掉
int margin = (sizes[layer+1]/2)/sampleStep+1;
//如果要用掩碼,則需要調整掩碼矩陣的大小
if( !mask_sum.empty() )
resizeHaarPattern( dm, &Dm, NM, 9, size, mask_sum.cols );
//計算該層影象的寬步長
int step = (int)(dets[layer].step/dets[layer].elemSize());
//遍歷該層影象的所有畫素
for( int i = margin; i < layer_rows - margin; i++ )
{
//行列式矩陣當前行的首地址指標
const float* det_ptr = dets[layer].ptr<float>(i);
//跡矩陣當前行的首地址指標
const float* trace_ptr = traces[layer].ptr<float>(i);
for( int j = margin; j < layer_cols-margin; j++ )
{
float val0 = det_ptr[j]; //行列式矩陣當前畫素
if( val0 > hessianThreshold ) //行列式值要大於所設定的閾值
{
/* Coordinates for the start of the wavelet in the sum image. There
is some integer division involved, so don't try to simplify this
(cancel out sampleStep) without checking the result is the same */
//該點對應於輸入影象的座標位置
int sum_i = sampleStep*(i-(size/2)/sampleStep);
int sum_j = sampleStep*(j-(size/2)/sampleStep);
/* The 3x3x3 neighbouring samples around the maxima.
The maxima is included at N9[1][4] */
const float *det1 = &dets[layer-1].at<float>(i, j); //下一層畫素
const float *det2 = &dets[layer].at<float>(i, j); //當前層畫素
const float *det3 = &dets[layer+1].at<float>(i, j); //上一層畫素
//3×3×3區域內的所有27個畫素的行列式值
float N9[3][9] = { { det1[-step-1], det1[-step], det1[-step+1],
det1[-1] , det1[0] , det1[1],
det1[step-1] , det1[step] , det1[step+1] },
{ det2[-step-1], det2[-step], det2[-step+1],
det2[-1] , det2[0] , det2[1],
det2[step-1] , det2[step] , det2[step+1] },
{ det3[-step-1], det3[-step], det3[-step+1],
det3[-1] , det3[0] , det3[1],
det3[step-1] , det3[step] , det3[step+1] } };
/* Check the mask - why not just check the mask at the center of the wavelet? */
//去掉掩碼沒有包含的畫素
if( !mask_sum.empty() )
{
const int* mask_ptr = &mask_sum.at<int>(sum_i, sum_j);
float mval = calcHaarPattern( mask_ptr, &Dm, 1 );
if( mval < 0.5 )
continue;
}
/* Non-maxima suppression. val0 is at N9[1][4]*/
//非最大值抑制,當前點與其周圍的26個點比較,val0就是N9[1][4]
if( val0 > N9[0][0] && val0 > N9[0][1] && val0 > N9[0][2] &&
val0 > N9[0][3] && val0 > N9[0][4] && val0 > N9[0][5] &&
val0 > N9[0][6] && val0 > N9[0][7] && val0 > N9[0][8] &&
val0 > N9[1][0] && val0 > N9[1][1] && val0 > N9[1][2] &&
val0 > N9[1][3] && val0 > N9[1][5] &&
val0 > N9[1][6] && val0 > N9[1][7] && val0 > N9[1][8] &&
val0 > N9[2][0] && val0 > N9[2][1] && val0 > N9[2][2] &&
val0 > N9[2][3] && val0 > N9[2][4] && val0 > N9[2][5] &&
val0 > N9[2][6] && val0 > N9[2][7] && val0 > N9[2][8] )
{
/* Calculate the wavelet center coordinates for the maxima */
//插值的需要,座標位置要減0.5
float center_i = sum_i + (size-1)*0.5f;
float center_j = sum_j + (size-1)*0.5f;
//定義該候選特徵點
KeyPoint kpt( center_j, center_i, (float)sizes[layer],
-1, val0, octave, CV_SIGN(trace_ptr[j]) );
/* Interpolate maxima location within the 3x3x3 neighbourhood */
//ds為濾波器模板尺寸的變化率,即當前層尺寸與下一層尺寸的差值
int ds = size - sizes[layer-1];
//插值計算,interpolateKeypoint函式在後面給出詳細解釋
int interp_ok = interpolateKeypoint( N9, sampleStep, sampleStep, ds, kpt );
/* Sometimes the interpolation step gives a negative size etc. */
//如果得到合理的插值結果,儲存該特徵點
if( interp_ok )
{
/*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
cv::AutoLock lock(findMaximaInLayer_m);
keypoints.push_back(kpt); //當前畫素為特徵點,儲存該資料
}
}
}
}
}
}
插值計算interpolateKeypoint函式:
/*
* Maxima location interpolation as described in "Invariant Features from
* Interest Point Groups" by Matthew Brown and David Lowe. This is performed by
* fitting a 3D quadratic to a set of neighbouring samples.
*
* The gradient vector and Hessian matrix at the initial keypoint location are
* approximated using central differences. The linear system Ax = b is then
* solved, where A is the Hessian, b is the negative gradient, and x is the
* offset of the interpolated maxima coordinates from the initial estimate.
* This is equivalent to an iteration of Netwon's optimisation algorithm.
*
* N9 contains the samples in the 3x3x3 neighbourhood of the maxima
* dx is the sampling step in x
* dy is the sampling step in y
* ds is the sampling step in size
* point contains the keypoint coordinates and scale to be modified
*
* Return value is 1 if interpolation was successful, 0 on failure.
*/
//N9為極值所在的3×3×3鄰域
//dx,dy,ds為橫、縱座標和尺度座標的變化率(單位步長),dx和dy就是取樣間隔
static int
interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt )
{
//b為公式15中的負值
Vec3f b(-(N9[1][5]-N9[1][3])/2, // Negative 1st deriv with respect to x
-(N9[1][7]-N9[1][1])/2, // Negative 1st deriv with respect to y
-(N9[2][4]-N9[0][4])/2); // Negative 1st deriv with respect to s
//A為公式14
Matx33f A(
N9[1][3]-2*N9[1][4]+N9[1][5], // 2nd deriv x, x
(N9[1][8]-N9[1][6]-N9[1][2]+N9[1][0])/4, // 2nd deriv x, y
(N9[2][5]-N9[2][3]-N9[0][5]+N9[0][3])/4, // 2nd deriv x, s
(N9[1][8]-N9[1][6]-N9[1][2]+N9[1][0])/4, // 2nd deriv x, y
N9[1][1]-2*N9[1][4]+N9[1][7], // 2nd deriv y, y
(N9[2][7]-N9[2][1]-N9[0][7]+N9[0][1])/4, // 2nd deriv y, s
(N9[2][5]-N9[2][3]-N9[0][5]+N9[0][3])/4, // 2nd deriv x, s
(N9[2][7]-N9[2][1]-N9[0][7]+N9[0][1])/4, // 2nd deriv y, s
N9[0][4]-2*N9[1][4]+N9[2][4]); // 2nd deriv s, s
//x=A-1b,公式13的結果,即偏移量
Vec3f x = A.solve(b, DECOMP_LU);
//判斷偏移量是否合理,不能大於1,否則會偏移到別的特徵點位置上
bool ok = (x[0] != 0 || x[1] != 0 || x[2] != 0) &&
std::abs(x[0]) <= 1 && std::abs(x[1]) <= 1 && std::abs(x[2]) <= 1;
if( ok ) //如果偏移量合理
{
//由公式16,計算偏移後的位置
kpt.pt.x += x[0]*dx;
kpt.pt.y += x[1]*dy;
//其實這裡的size表示的是盒狀濾波器模板的尺寸,但由於模板尺寸和影象尺度可以通過公式9轉換,所有儲存尺寸資訊也無妨
kpt.size = (float)cvRound( kpt.size + x[2]*ds );
}
return ok;
}
方向分配和確定描述符類:
struct SURFInvoker : ParallelLoopBody
{
// ORI_RADIUS表示6s圓鄰域的取樣半徑長,ORI_WIN表示扇形滑動視窗的張角,PATCH_SZ表示20s方鄰域的取樣邊長
enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
//建構函式
SURFInvoker( const Mat& _img, const Mat& _sum,
vector<KeyPoint>& _keypoints, Mat& _descriptors,
bool _extended, bool _upright )
{
keypoints = &_keypoints; //特徵點
descriptors = &_descriptors; //描述符
img = &_img; //輸入影象
sum = &_sum; //積分影象
extended = _extended; //描述符是128維還是64維
upright = _upright; //是否採用U-SURF演算法
// Simple bound for number of grid points in circle of radius ORI_RADIUS
//以特徵點為中心、半徑為6s的圓鄰域內,以s為取樣間隔時所取樣的畫素數量
//nOriSampleBound為正方形內的畫素數量,在這裡是用該變數來限定實際的取樣數量
const int nOriSampleBound = (2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
// Allocate arrays
//分配陣列空間
apt.resize(nOriSampleBound);
aptw.resize(nOriSampleBound); //aptw為6s圓鄰域內的高斯加權係數
DW.resize(PATCH_SZ*PATCH_SZ); //DW為20s方鄰域內的高斯加權係數
/* Coordinates and weights of samples used to calculate orientation */
//得到6s圓鄰域內的高斯加權函式(方差為2.5s,與Bay原著取值不同)模板的係數,SURF_ORI_SIGMA = 2.5f;
Mat G_ori = getGaussianKernel( 2*ORI_RADIUS+1, SURF_ORI_SIGMA, CV_32F );
//實際6s圓鄰域內的取樣數
nOriSamples = 0;
//事先計算好6s圓鄰域內所有采樣畫素的高斯加權係數
for( int i = -ORI_RADIUS; i <= ORI_RADIUS; i++ )
{
for( int j = -ORI_RADIUS; j <= ORI_RADIUS; j++ )
{
if( i*i + j*j <= ORI_RADIUS*ORI_RADIUS )
{
apt[nOriSamples] = cvPoint(i,j); //相對於圓心的座標位置
//高斯加權係數
aptw[nOriSamples++] = G_ori.at<float>(i+ORI_RADIUS,0) * G_ori.at<float>(j+ORI_RADIUS,0);
}
}
}
//判斷實際的取樣數量是否超過了最大限定數量
CV_Assert( nOriSamples <= nOriSampleBound );
/* Gaussian used to weight descriptor samples */
//得到20s方鄰域內的高斯加權函式(方差為3.3s)模板的係數,SURF_DESC_SIGMA = 3.3f
Mat G_desc = getGaussianKernel( PATCH_SZ, SURF_DESC_SIGMA, CV_32F );
//事先計算好20s方鄰域內所有采樣畫素的高斯加權係數
for( int i = 0; i < PATCH_SZ; i++ )
{
for( int j = 0; j < PATCH_SZ; j++ )
DW[i*PATCH_SZ+j] = G_desc.at<float>(i,0) * G_desc.at<float>(j,0);
}
}
//過載( )運算子
void operator()(const Range& range) const
{
/* X and Y gradient wavelet data */
//x方向和y方向的Haar小波響應的突起部分的數量
const int NX=2, NY=2;
//dx_s和dy_s的含義與盒狀濾波器模板的含義相同,陣列中五個元素的前四個元素表示突起部分左上角和右下角的座標位置,第五個元素為權值
const int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
const int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
// Optimisation is better using nOriSampleBound than nOriSamples for
// array lengths. Maybe because it is a constant known at compile time
//以特徵點為中心、半徑為6s的圓鄰域內,以s為取樣間隔時所取樣的畫素數量
//nOriSampleBound實為正方形內的畫素數量,正方形的面積要大於圓形,所以為了保險起見,後面在定義變數的時候,用該值表示變數的數量
const int nOriSampleBound =(2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
//陣列X儲存6s圓鄰域內的Haar小波響應的x方向梯度值,陣列Y儲存y方向梯度,陣列angle儲存角度
float X[nOriSampleBound], Y[nOriSampleBound], angle[nOriSampleBound];
//定義20s方鄰域的二維陣列
uchar PATCH[PATCH_SZ+1][PATCH_SZ+1];
//DX和DY分別為20s方鄰域內取樣畫素的dx和dy
float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
//由於矩陣matX,matY和_angle的rows都為1,所以這三個矩陣變數其實是向量變數。它們的值分別為X,Y和angle陣列的值
CvMat matX = cvMat(1, nOriSampleBound, CV_32F, X);
CvMat matY = cvMat(1, nOriSampleBound, CV_32F, Y);
CvMat _angle = cvMat(1, nOriSampleBound, CV_32F, angle);
//定義20s方鄰域的矩陣變數
Mat _patch(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
//確定描述符是128維還是64維
int dsize = extended ? 128 : 64;
//k1為0,k2為N,即特徵點的總數
int k, k1 = range.start, k2 = range.end;
//該變數代表特徵點的最大尺度
float maxSize = 0;
//遍歷所有特徵點,找到特徵點的最大尺度,其實找到的是盒狀濾波器模板的最大尺寸
for( k = k1; k < k2; k++ )
{
maxSize = std::max(maxSize, (*keypoints)[k].size);
}
//得到20s方鄰域內的最大值,即帶入尺度s的最大值
int imaxSize = std::max(cvCeil((PATCH_SZ+1)*maxSize*1.2f/9.0f), 1);
//在系統中為20s方鄰域開闢一段包括了所有可能的s值的記憶體空間
Ptr<CvMat> winbuf = cvCreateMat( 1, imaxSize*imaxSize, CV_8U );
//遍歷所有特徵點
for( k = k1; k < k2; k++ )
{
int i, j, kk, nangle;
float* vec;
SurfHF dx_t[NX], dy_t[NY];
KeyPoint& kp = (*keypoints)[k]; //當前特徵點
float size = kp.size; //特徵點尺度,其實是盒狀濾波器模板的尺寸
Point2f center = kp.pt; //特徵點的位置座標
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
//由公式9,把模板尺寸轉換為影象尺度,得到真正的尺度資訊
float s = size*1.2f/9.0f;
/* To find the dominant orientation, the gradients in x and y are
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */
//得到確定方向時所用的haar小波響應的尺寸,即4s
int grad_wav_size = 2*cvRound( 2*s );
//如果haar小波響應的尺寸大於輸入影象的尺寸,則進入if語句
if( sum->rows < grad_wav_size || sum->cols < grad_wav_size )
{
/* when grad_wav_size is too big,
* the sampling of gradient will be meaningless
* mark keypoint for deletion. */
//進入該if語句,說明haar小波響應的尺寸太大,在這麼大的尺寸下進行任何操作都是沒有意義的,所以標註該特徵點為無效的特徵點,待後面的程式碼刪除該特徵點
kp.size = -1;
continue; //繼續下一個特徵點的操作
}
float descriptor_dir = 360.f - 90.f;
if (upright == 0) //不採用U-SURF演算法
{
//調整兩個Haar小波響應的尺寸大小
resizeHaarPattern( dx_s, dx_t, NX, 4, grad_wav_size, sum->cols );
resizeHaarPattern( dy_s, dy_t, NY, 4, grad_wav_size, sum->cols );
//遍歷6s圓鄰域內的所有采樣畫素
for( kk = 0, nangle = 0; kk < nOriSamples; kk++ )
{
//x和y為該取樣畫素在輸入影象的座標位置
int x = cvRound( center.x + apt[kk].x*s - (float)(grad_wav_size-1)/2 );
int y = cvRound( center.y + apt[kk].y*s - (float)(grad_wav_size-1)/2 );
//判斷該座標是否超出了影象的邊界
if( y < 0 || y >= sum->rows - grad_wav_size ||
x < 0 || x >= sum->cols - grad_wav_size )
continue; //超出了,則繼續下一個取樣畫素的操作
//積分影象的畫素
const int* ptr = &sum->at<int>(y, x);
//分別得到x方向和y方向的Haar小波響應值
float vx = calcHaarPattern( ptr, dx_t, 2 );
float vy = calcHaarPattern( ptr, dy_t, 2 );
//對Haar小波響應值進行高斯加權處理
X[nangle] = vx*aptw[kk];
Y[nangle] = vy*aptw[kk];
nangle++; //為實際取樣畫素的總數計數
}
if( nangle == 0 ) // nangle == 0表明6s圓鄰域內沒有采樣畫素
{
// No gradient could be sampled because the keypoint is too
// near too one or more of the sides of the image. As we
// therefore cannot find a dominant direction, we skip this
// keypoint and mark it for later deletion from the sequence.
//這種情況說明該特徵點與影象的邊界過於接近,因此也要把它刪除
kp.size = -1;
continue; //繼續下一個特徵點的操作
}
//把matX,matY和_angle的長度重新定義為實際的6s圓鄰域內的取樣點數
matX.cols = matY.cols = _angle.cols = nangle;
//用cvCartToPolar函式(直角座標轉換為極座標)求梯度座標系下采樣點的角度,結果儲存在矩陣_angle對應的angle陣列內
cvCartToPolar( &matX, &matY, 0, &_angle, 1 );
float bestx = 0, besty = 0, descriptor_mod = 0;
//SURF_ORI_SEARCH_INC = 5
//以5度的步長,遍歷360度的圓周
for( i = 0; i < 360; i += SURF_ORI_SEARCH_INC )
{
//sumx和sumy分別表示梯度座標系下扇形滑動視窗內所有采樣畫素的橫、縱座標之和,temp_mod表示模
float sumx = 0, sumy = 0, temp_mod;
//遍歷所有的取樣畫素
for( j = 0; j < nangle; j++ )
{
//得到當前取樣點相對於旋轉扇區的角度
int d = std::abs(cvRound(angle[j]) - i);
//判斷d是否在正負30度之間,即判斷當前點是否落在該扇形滑動視窗內
if( d < ORI_WIN/2 || d > 360-ORI_WIN/2 )
{
//累計在該滑動視窗內的畫素的橫、縱座標之和
sumx += X[j];
sumy += Y[j];
}
}
//公式17,求該滑動視窗內的模
temp_mod = sumx*sumx + sumy*sumy;
//判斷該滑動視窗內的模是否為已計算過的最大值
if( temp_mod > descriptor_mod )
{
//重新給模最大值,橫、縱座標之和賦值
descriptor_mod = temp_mod;
bestx = sumx;
besty = sumy;
}
}
//公式18和公式19,求特徵點的方向
descriptor_dir = fastAtan2( -besty, bestx );
}
//為特徵點的方向賦值,如果採用U-SURF演算法,則它的方向恆為270度;
kp.angle = descriptor_dir;
//如果不需要生成描述符,則不進行if後面的操作
if( !descriptors || !descriptors->data )
continue;
//以下部分為描述符的生成
/* Extract a window of pixels around the keypoint of size 20s */
// win_size為20s,即20s方鄰域的真正尺寸
int win_size = (int)((PATCH_SZ+1)*s);
//確保20s方鄰域的尺寸不超過事先設定好的記憶體空間
CV_Assert( winbuf->cols >= win_size*win_size );
//定義20s方鄰域的矩陣
Mat win(win_size, win_size, CV_8U, winbuf->data.ptr);
if( !upright ) //不採用U-SURF演算法
{
//把特徵點的方向角度轉換為弧度形式
descriptor_dir *= (float)(CV_PI/180);
//方向角度的正弦值和餘弦值
float sin_dir = -std::sin(descriptor_dir);
float cos_dir = std::cos(descriptor_dir);
/* Subpixel interpolation version (slower). Subpixel not required since
the pixels will all get averaged when we scale down to 20 pixels */
/*
float w[] = { cos_dir, sin_dir, center.x,
-sin_dir, cos_dir , center.y };
CvMat W = cvMat(2, 3, CV_32F, w);
cvGetQuadrangleSubPix( img, &win, &W );
*/
float win_offset = -(float)(win_size-1)/2;
//得到旋轉校正以後的20s方鄰域的左上角座標
float start_x = center.x + win_offset*cos_dir + win_offset*sin_dir;
float start_y = center.y - win_offset*sin_dir + win_offset*cos_dir;
//20s方鄰域內畫素的首地址指標
uchar* WIN = win.data;
#if 0
// Nearest neighbour version (faster)
for( i = 0; i < win_size; i++, start_x += sin_dir, start_y += cos_dir )
{
float pixel_x = start_x;
float pixel_y = start_y;
for( j = 0; j < win_size; j++, pixel_x += cos_dir, pixel_y -= sin_dir )
{
int x = std::min(std::max(cvRound(pixel_x), 0), img->cols-1);
int y = std::min(std::max(cvRound(pixel_y), 0), img->rows-1);
WIN[i*win_size + j] = img->at<uchar>(y, x);
}
}
#else
int ncols1 = img->cols-1, nrows1 = img->rows-1;
size_t imgstep = img->step;
//找到20s方鄰域內的所有畫素
for( i = 0; i < win_size; i++, start_x += sin_dir, start_y += cos_dir )
{
//得到旋轉校正後的20s方鄰域的行的首畫素座標
double pixel_x = start_x;
double pixel_y = start_y;
for( j = 0; j < win_size; j++, pixel_x += cos_dir, pixel_y -= sin_dir )
{
//得到旋轉校正後的20s方鄰域的畫素座標
int ix = cvFloor(pixel_x), iy = cvFloor(pixel_y);
//判斷該畫素座標是否超過了影象的邊界
if( (unsigned)ix < (unsigned)ncols1 &&
(unsigned)iy < (unsigned)nrows1 ) //沒有超過邊界
{
//計算偏移量
float a = (float)(pixel_x - ix), b = (float)(pixel_y - iy);
//提取出影象的灰度值
const uchar* imgptr = &img->at<uchar>(iy, ix);
//由線性插值法計算20s方鄰域內所對應的畫素灰度值
WIN[i*win_size + j] = (uchar)
cvRound(imgptr[0]*(1.f - a)*(1.f - b) +
imgptr[1]*a*(1.f - b) +
imgptr[imgstep]*(1.f - a)*b +
imgptr[imgstep+1]*a*b);
}
else //超過了影象邊界
{
//用邊界處的灰度值代替超出部分的20s方鄰域內的灰度值
int x = std::min(std::max(cvRound(pixel_x), 0), ncols1);
int y = std::min(std::max(cvRound(pixel_y), 0), nrows1);
WIN[i*win_size + j] = img->at<uchar>(y, x);
}
}
}
#endif
}
else //採用U-SURF演算法
{
// extract rect - slightly optimized version of the code above
// TODO: find faster code, as this is simply an extract rect operation,
// e.g. by using cvGetSubRect, problem is the border processing
// descriptor_dir == 90 grad
// sin_dir == 1
// cos_dir == 0
float win_offset = -(float)(win_size-1)/2;
//得到20s方鄰域的左上角座標
int start_x = cvRound(center.x + win_offset);
int start_y = cvRound(center.y - win_offset);
uchar* WIN = win.data;
//找到20s方鄰域內的所有畫素
for( i = 0; i < win_size; i++, start_x++ )
{
//行的首畫素座標
int pixel_x = start_x;
int pixel_y = start_y;
for( j = 0; j < win_size; j++, pixel_y-- )
{
//x和y為畫素座標
int x = MAX( pixel_x, 0 );
int y = MAX( pixel_y, 0 );
//確保座標不超過影象邊界
x = MIN( x, img->cols-1 );
y = MIN( y, img->rows-1 );
WIN[i*win_size + j] = img->at<uchar>(y, x); //賦值
}
}
}
// Scale the window to size PATCH_SZ so each pixel's size is s. This
// makes calculating the gradients with wavelets of size 2s easy
//利用面積相關法,把邊長為20s的正方形縮小為邊長為20的正方形(尺度s一定是大於1,所以是縮小,而不是擴大),也就是相當於對邊長為20s的正方形進行取樣間隔為s的等間隔取樣
resize(win, _patch, _patch.size(), 0, 0, INTER_AREA);
// Calculate gradients in x and y with wavelets of size 2s
//遍歷20s方鄰域內的所有采樣畫素
for( i = 0; i < PATCH_SZ; i++ )
for( j = 0; j < PATCH_SZ; j++ )
{
float dw = DW[i*PATCH_SZ + j]; //得到高斯加權係數
//計算加權後的dx和dy
float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
DX[i][j] = vx;
DY[i][j] = vy;
}
// Construct the descriptor
//描述符向量變數
vec = descriptors->ptr<float>(k);
//描述符向量變數清零
for( kk = 0; kk < dsize; kk++ )
vec[kk] = 0;
double square_mag = 0; //模的平方
if( extended ) //128維描述符
{
// 128-bin descriptor
//遍歷20s方鄰域內的4×4個子區域
for( i = 0; i < 4; i++ )
for( j = 0; j < 4; j++ )
{
//遍歷子區域內的25個取樣畫素
for(int y = i*5; y < i*5+5; y++ )
{
for(int x = j*5; x < j*5+5; x++ )
{
float tx = DX[y][x], ty = DY[y][x]; //得到dx和dy
if( ty >= 0 ) //dy ≥ 0
{
vec[0] += tx; //累加dx
vec[1] += (float)fabs(tx); //累加|dx|
} else { //dy < 0
vec[2] += tx; //累加dx
vec[3] += (float)fabs(tx); //累加|dx|
}
if ( tx >= 0 ) //dx ≥ 0
{
vec[4] += ty; //累加dy
vec[5] += (float)fabs(ty); //累加|dy|
} else { //dx < 0
vec[6] += ty; //累加dy
vec[7] += (float)fabs(ty); //累加|dy|
}
}
}
for( kk = 0; kk < 8; kk++ )
square_mag += vec[kk]*vec[kk];
vec += 8;
}
}
else //64維描述符
{
// 64-bin descriptor
//遍歷20s方鄰域內的4×4個子區域
for( i = 0; i < 4; i++ )
for( j = 0; j < 4; j++ )
{
//遍歷子區域內的25個取樣畫素
for(int y = i*5; y < i*5+5; y++ )
{
for(int x = j*5; x < j*5+5; x++ )
{
float tx = DX[y][x], ty = DY[y][x]; //得到dx和dy
//累加子區域內的dx、dy、|dx|和|dy|
vec[0] += tx; vec[1] += ty;
vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
}
}
for( kk = 0; kk < 4; kk++ )
square_mag += vec[kk]*vec[kk];
vec+=4;
}
}
// unit vector is essential for contrast invariance
//為滿足對比度不變性,即去除光照變化的影響,對描述符進行歸一化處理
vec = descriptors->ptr<float>(k);
float scale = (float)(1./(sqrt(square_mag) + DBL_EPSILON));
for( kk = 0; kk < dsize; kk++ )
vec[kk] *= scale; //公式21
}
}
// Parameters
//成員變數
const Mat* img;
const Mat* sum;
vector<KeyPoint>* keypoints;
Mat* descriptors;
bool extended;
bool upright;
// Pre-calculated values
int nOriSamples;
vector<Point> apt;
vector<float> aptw;
vector<float> DW;
};