1. 程式人生 > >最大穩定極值區域MSERs

最大穩定極值區域MSERs

引言簡述

Harris興趣點檢測器是一種產生平移旋轉不變結果的演算法。當噪聲和離散化的影響可以忽略時,Harris檢測器作用於旋轉或平移過程的影象,其輸出是一組旋轉或平移的點集。然而,如果影象被縮放或經過射影變換,則Harris檢測器的輸出也會劇烈變化。最大穩定極值區域(Maximally Stable Extremal Regions MSERs)是一種影象結構,不僅是是在平移和旋轉後,即便是經歷相似仿射變換,它仍可被重複檢測出來

基本理論

MSER檢測過程:假如一幅灰度圖 I所有可能的閾值,比如通用的範圍S = [0,1, 2..., 255] 。對於低於閾值的畫素,其稱為黑色畫素,高於閾值的則稱為白色畫素。如果要想顯示閾值化的影象It

的影像,其中t幀對應於閾值t,那麼首張為一幅白色影象。隨後,對應於區域性強度最小值的黑色點開始出現並增長。在某一點處,對應於兩個區域性極小值的區域將合併。最後一幅影象將為黑色。在很多影象下,我們可以觀察到,在特定區域大範圍的閾值內,區域性二值化很穩定。概括起來,MSER檢測算為:

演算法:遍歷極值區域
輸入:影象 I
輸出:巢狀的極值區域列表
1. 對按強度排序的所有畫素:
   1)在影象中放置畫素
   2)更新連通分量結構
   3)更新影響到的連通分量的面積
2. 對所有連通區域:
   連通分量面積的變化率區域性極小值即為穩定的閾值

該過程產生了一個數據結構,其中儲存了每個連通分量的面積,為閾值的函式。兩個分量的合併可視為較小分量的消忘,然後小分量的所有畫素全部加入大的分量。最後,面積函式變化率的區域性極小值的強度級別,即被選作閾值。在輸出中,每個MSER

都由一個區域性強度極小值和一個閾值來表示。此演算法的結構和一種高效的分水嶺演算法是一樣的。然而,兩種演算法的輸出結構不同。在分水嶺計算中,著重點在於區域合併以及分水嶺盆接觸的閾值。這種閾值非常不穩定,在一次合併後,區域的面積突然變化,在MSER檢測中,找到一組閾值,這樣使得分水嶺盆有效地保持不變

參考程式碼

參考程式碼貼出相關部分的MSER.H標頭檔案和MSER.CPP檔案。如下程式碼所示(僅作參考):

MSER.H標頭檔案

#ifndef _MSER_H_
#define _MSER_H_

#include <vector>
.........
class MSER
{
public:
	// A Maximally Stable Extremal Region.
	struct Region
	{
		int level_; 
		int pixel_; 
		int area_; 

		double moments_[5]; 
		double variation_; 		
		Region(int level = 256, int pixel = 0);
	private:
		bool stable_; 
		Region * parent_; 
		Region * child_; 
		Region * next_; 
		
		void accumulate(int x, int y);
		void merge(Region * child);
		void detect(int delta, int minArea, int maxArea, double maxVariation,  
                             double minDiversity,std::vector<Region> & regions);
		void process(int delta, int minArea, int maxArea, double maxVariation);
		bool check(double variation, int area) const;
		void save(double minDiversity, std::vector<Region> & regions);
		
		friend class MSER;
	};
	
	MSER(int delta = 2, double minArea = 0.0001, double maxArea = 0.5, 
             double maxVariation = 0.5,	double minDiversity = 0.33, bool eight = false);
	void operator()(const uint8_t * bits,int width, int  height,
                        std::vector <Region> & regions);
	..........
private:
	void processStack(int newPixelGreyLevel,int pixel,std::vector<Region*>&regionStack);
        std::ptrdiff_t doublePool(std::vector<Region *>&regionStack);
	
	// Parameters
	int delta_;
	double minArea_;
	double maxArea_;
	double maxVariation_;
	double minDiversity_;
	bool eight_;
	
	// Memory pool of regions for faster allocation
	std::vector<Region> pool_;
	std::size_t poolIndex_;
        ..........
};
#endif //_MSER_H_

MSER.CPP

#include "mser.h"

#include <algorithm>
#include <cassert>
#include <limits>

MSER::MSER(int delta, double minArea, double maxArea,double maxVariation,
        double minDiversity,bool eight):eight_(eight),delta_(delta), 
        minArea_(minArea), maxArea_(maxArea),maxVariation_(maxVariation),
        minDiversity_(minDiversity), pool_(256), poolIndex_(0)
{
	// Parameter check
	assert(delta > 0);
	assert(minArea >= 0.0);
	assert(maxArea <= 1.0);
	assert(minArea < maxArea);
	assert(maxVariation > 0.0);
	assert(minDiversity >= 0.0);
	assert(minDiversity < 1.0);
}

void MSER::operator()(const uint8_t * bits, int width, int height, vector<Region> & regions)
{
	vector<bool> accessible(width * height);
	vector<int> boundaryPixels[256];
	int priority = 256;
	vector<Region *> regionStack;
	
	regionStack.push_back(new (&pool_[poolIndex_++]) Region);
	
	int curPixel = 0;
	int curEdge = 0;
	int curLevel = bits[0];
	accessible[0] = true;
	
	regionStack.push_back(new (&pool_[poolIndex_++]) Region(curLevel, curPixel));
	
	if (poolIndex_ == pool_.size())
		doublePool(regionStack);

        for (;;) {
		const int x = curPixel % width;
		const int y = curPixel / width;
		
		for (; curEdge < (eight_ ? 8 : 4); ++curEdge) {
			int neighborPixel = curPixel;
			
			if (eight_) {
			   switch (curEdge) {
			      case 0: if (x < width - 1) 
                                     neighborPixel = curPixel + 1; break;
			      case 1: if ((x < width - 1) && (y > 0)) 
                                     neighborPixel = curPixel - width + 1; break;
			      case 2: if (y > 0) 
                                     neighborPixel = curPixel - width; break;
			      case 3: if ((x > 0) && (y > 0)) 
                                     neighborPixel = curPixel - width - 1; break;
			      case 4: if (x > 0) 
                                     neighborPixel = curPixel - 1; break;
			      case 5: if ((x > 0) && (y < height - 1)) 
                                     neighborPixel = curPixel + width - 1; break;
			      case 6: if (y < height - 1) 
                                     neighborPixel = curPixel + width; break;
			      default: if ((x < width - 1) && (y < height - 1)) 
                                     neighborPixel = curPixel + width + 1; break;
				}
			}
			else {
			   switch (curEdge) {
			      case 0: if (x < width - 1) 
                                    neighborPixel = curPixel + 1; break;
			      case 1: if (y < height - 1) 
                                    neighborPixel = curPixel + width; break;
			      case 2: if (x > 0) 
                                    neighborPixel = curPixel - 1; break;
			      default: if (y > 0) 
                                    neighborPixel = curPixel - width; break;
				}
			}
			
			if (neighborPixel != curPixel && !accessible[neighborPixel]) {
				const int neighborLevel = bits[neighborPixel];
				accessible[neighborPixel] = true;
				
				if (neighborLevel >= curLevel) {
					boundaryPixels[neighborLevel].push_back(neighborPixel << 4);
					
					if (neighborLevel < priority)
						priority = neighborLevel;
				}
				else {
					boundaryPixels[curLevel].push_back((curPixel << 4)|(curEdge + 1));
					
					if (curLevel < priority)
						priority = curLevel;
					
					curPixel = neighborPixel;
					curEdge = 0;
					curLevel = neighborLevel;
					
					goto step_3;
				}
			}
		}
		
		regionStack.back()->accumulate(x, y);

		if (priority == 256) {
			regionStack.back()->detect(delta_, minArea_ * width * height,
			maxArea_ * width * height, maxVariation_, minDiversity_,regions);
			poolIndex_ = 0;
			return;
		}
		
		curPixel = boundaryPixels[priority].back() >> 4;
		curEdge = boundaryPixels[priority].back() & 15;
		
		boundaryPixels[priority].pop_back();
		
		while (boundaryPixels[priority].empty() && (priority < 256))
			++priority;
		
		const int newPixelGreyLevel = bits[curPixel];
		
		if (newPixelGreyLevel != curLevel) {
		    curLevel = newPixelGreyLevel;
		    processStack(newPixelGreyLevel, curPixel, regionStack);
		}
	}
}

void MSER::processStack(int newPixelGreyLevel,int pixel,vector<Region *> &regionStack)
{
	do {
		Region * top = regionStack.back();
		
		regionStack.pop_back();
		if (newPixelGreyLevel < regionStack.back()->level_) {
		    regionStack.push_back(new(&pool_[poolIndex_++]);
                    Region(newPixelGreyLevel,pixel));
		    if (poolIndex_ == pool_.size())
			 top = reinterpret_cast<Region *>(reinterpret_cast<char *>(top) +
				doublePool(regionStack));
		    regionStack.back()->merge(top);
		    return;
		}
		regionStack.back()->merge(top);
	}
	while (newPixelGreyLevel > regionStack.back()->level_);
}

ptrdiff_t MSER::doublePool(vector<Region *> & regionStack)
{
	assert(!pool_.empty());
	
	vector<Region> newPool(pool_.size() * 2);
	copy(pool_.begin(), pool_.end(), newPool.begin());
	
	const ptrdiff_t offset = reinterpret_cast<char *>(&newPool[0]) -
				reinterpret_cast<char *>(&pool_[0]);
	
	for (size_t i = 0; i < pool_.size(); ++i) {
	  if (newPool[i].parent_)
	      newPool[i].parent_ = reinterpret_cast<Region *>(
                         reinterpret_cast<char *>(newPool[i].parent_) + offset);
	  if (newPool[i].child_)
              newPool[i].child_ = reinterpret_cast<Region *>(
                         reinterpret_cast<char *>(newPool[i].child_) + offset);
	  if (newPool[i].next_)
	      newPool[i].next_ = reinterpret_cast<Region *>(
                         reinterpret_cast<char *>(newPool[i].next_) + offset);
	}
	
	for (size_t i = 0; i < regionStack.size(); ++i)
	  regionStack[i] = reinterpret_cast<Region *>(
                         reinterpret_cast<char *>(regionStack[i]) + offset);
	pool_.swap(newPool);
	
	return offset;
}
......... 

Python版MSERS

import numpy as np
import cv2
import video

if __name__ == '__main__':
    import sys
    try: video_src = sys.argv[1]
    except: video_src = 0

    cam = video.create_capture(video_src)
    mser = cv2.MSER()
    while True:
        ret, img = cam.read()
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        vis = img.copy()

        regions = mser.detect(gray, None)
        hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
        cv2.polylines(vis, hulls, 1, (0, 255, 0))

        cv2.imshow('img', vis)
        if 0xFF & cv2.waitKey(5) == 27:
            break
    cv2.destroyAllWindows()

測試輸出


經測試輸出視訊影象分析所得:MSER檢測有時也有閾值化有關,每個極值區域是閾值化影象的一個連通分量。然而,並不需要全域性或"最優"的閾值,測試所有的閾值,連通分量的穩定性都經過評估。最終,分水嶺是輸入影象的劃分,而如果某些影象部分仍有穩定的閾值存在的話MSER就可能是巢狀的

參考文獻

[1] J. Matas, O. Chum, M. Urban, and T. Pajdla. Proc. of British Machine Vision Conference, pages 384-396, 2002.

關於Image Engineering & Computer Vision的更多討論與交流,敬請關注本博和新浪微博songzi_tea.