1. 程式人生 > >基於圖的影象分割方法(Graph-Based Image Segmentation)原始碼閱讀筆記

基於圖的影象分割方法(Graph-Based Image Segmentation)原始碼閱讀筆記

這個方法被應用於深度學習目標檢測的經典之作selective search方法中(Selective Search for Object Recognition),用於初始化分割區域。。論文題目:《Efficient Graph-Based Image Segmentation》

查閱了許多部落格,後來感覺,對於這個方法整體還是一知半解,於是花了一個下午閱讀了原始碼,做一個筆記,如有錯誤,希望大家指正

原始碼輸入5個引數,示意如下:

 sigma: Used to smooth the input image before segmenting it.
  k: Value for the
threshold function. min: Minimum component size enforced by post-processing. input: Input image. output: Output image.

大意為,sigma:使用的平滑引數,k:給出的初始化閾值,min:最小分割塊大小(用於合併小塊),輸入圖片(input)和輸入圖片(output)(都要為pnm格式)

引數傳入segment.cpp中,如下

image<rgb> *seg = segment_image(input, sigma, k, min_size, &num_ccs); 

呼叫segment-image.h中的 image *segment_image,函式大概註釋如下:

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/
#ifndef SEGMENT_IMAGE #define SEGMENT_IMAGE #include <cstdlib> #include <image.h> #include <misc.h> #include <filter.h> #include <stdio.h> #include "segment-graph.h" // random color rgb random_rgb(){ rgb c; double r; c.r = (uchar)random(); c.g = (uchar)random(); c.b = (uchar)random(); return c; } // dissimilarity measure between pixels // 衡量不相似度,用rgb距離 static inline float diff(image<float> *r, image<float> *g, image<float> *b, int x1, int y1, int x2, int y2) { return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) + square(imRef(g, x1, y1)-imRef(g, x2, y2)) + square(imRef(b, x1, y1)-imRef(b, x2, y2))); } /* * Segment an image * * Returns a color image representing the segmentation. * * im: image to segment. * sigma: to smooth the image. * c: constant for treshold function. * min_size: minimum component size (enforced by post-processing stage). * num_ccs: number of connected components in the segmentation. */ image<rgb> *segment_image(image<rgb> *im, float sigma, float c, int min_size, int *num_ccs) { //影象的寬和高 int width = im->width(); int height = im->height(); // 初始化了三個通道的片 image<float> *r = new image<float>(width, height); image<float> *g = new image<float>(width, height); image<float> *b = new image<float>(width, height); // smooth each color channel //平滑色彩通道 for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { imRef(r, x, y) = imRef(im, x, y).r; imRef(g, x, y) = imRef(im, x, y).g; imRef(b, x, y) = imRef(im, x, y).b; } } image<float> *smooth_r = smooth(r, sigma); image<float> *smooth_g = smooth(g, sigma); image<float> *smooth_b = smooth(b, sigma); delete r; delete g; delete b; // build graph //構建圖,邊的陣列是原來的4倍,就是四個方向,計算出了全部的點和點之間的相似性 //這裡是初始化了width*height*4大小的陣列,其實用不了這麼多 edge *edges = new edge[width*height*4]; int num = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { //對每個邊的左右點計算權重 if (x < width-1) { edges[num].a = y * width + x; edges[num].b = y * width + (x+1); edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y); num++; } if (y < height-1) { edges[num].a = y * width + x; edges[num].b = (y+1) * width + x; edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1); num++; } if ((x < width-1) && (y < height-1)) { edges[num].a = y * width + x; edges[num].b = (y+1) * width + (x+1); edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1); num++; } if ((x < width-1) && (y > 0)) { edges[num].a = y * width + x; edges[num].b = (y-1) * width + (x+1); edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1); num++; } } } // printf("%d %d", width*height*4, num); // printf("width is:%d height is:%d", width, height); delete smooth_r; delete smooth_g; delete smooth_b; // segment //分割步驟,。。。。跳出 universe *u = segment_graph(width*height, num, edges, c); // post process small components //合併最小的塊,如果邊兩邊的塊小於min_size就合併 for (int i = 0; i < num; i++) { int a = u->find(edges[i].a); int b = u->find(edges[i].b); if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size))) u->join(a, b); } delete [] edges; *num_ccs = u->num_sets(); //寫入新的pnm圖片 image<rgb> *output = new image<rgb>(width, height); // pick random colors for each component rgb *colors = new rgb[width*height]; for (int i = 0; i < width*height; i++) colors[i] = random_rgb(); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { int comp = u->find(y * width + x); imRef(output, x, y) = colors[comp]; } } delete [] colors; delete u; return output; } #endif

其中 segment_graph(width*height, num, edges, c); 這一步跳到 segment-graph.h中

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/

#ifndef SEGMENT_GRAPH
#define SEGMENT_GRAPH

#include <algorithm>
#include <cmath>
#include "disjoint-set.h"

// threshold function
#define THRESHOLD(size, c) (c/size)

typedef struct {
  float w;
  int a, b;
} edge;

bool operator<(const edge &a, const edge &b) {
    //按照不相似程度從小到大排序
  return a.w < b.w;
}

/*
 * Segment a graph
 *
 * Returns a disjoint-set forest representing the segmentation.
 *
 * num_vertices: number of vertices in graph.
 * num_edges: number of edges in graph
 * edges: array of edges.
 * c: constant for treshold function.
 */
universe *segment_graph(int num_vertices, int num_edges, edge *edges, 
            float c) { 
  // sort edges by weight
    // 將edge按照權重進行排序,權重越大越不相似,這裡權重是不相似程度
  std::sort(edges, edges + num_edges);

  // make a disjoint-set forest
    // num_vertices 是頂點的數量
  universe *u = new universe(num_vertices);

  // init thresholds
    //這裡閾值是為了防止剛開始的時候每個點之間全部都分開了
  float *threshold = new float[num_vertices];
  for (int i = 0; i < num_vertices; i++)
      //c/1
    threshold[i] = THRESHOLD(1,c);

  // for each edge, in non-decreasing weight order...
  for (int i = 0; i < num_edges; i++) {
      //遍歷所有的邊
    edge *pedge = &edges[i];

    // components conected by this edge
      // 找出這個邊的左右頂點,這個頂點會隨著邊的合併而變動,為了保證閾值threshold[a]是最大的,就是這個域內的不相似程度最大
    int a = u->find(pedge->a);
    int b = u->find(pedge->b);
      // 如果 a==b 則說明在同個域裡面
    if (a != b) {
        //權值小於閾值,這裡a和b代表這個邊連結的兩個域的分別的最大不相似程度
      if ((pedge->w <= threshold[a]) &&
      (pedge->w <= threshold[b])) {
          //如果這個邊的長度(就是這個邊連結的兩個點的不相似程度)比兩邊的最大的不相似度都小,則將這兩個域連結起來
    u->join(a, b);
//          printf("size a is:%d ", u->size(a));
    a = u->find(a);
//          printf(" size  a is:%d \n", u->size(a));
          //這裡只更新了a的部分,這裡個人理解,不太確定,應該是因為在disjoint-set.h中程式碼find函式中總是找到下標最靠前的那個點,然後返回
    threshold[a] = pedge->w + THRESHOLD(u->size(a), c);
      }
    }
  }

  // free up
  delete threshold;
  return u;
}

#endif

下面是 universe *u = new universe(num_vertices); 的定義。。。在disjoint-set.h中

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/

#ifndef DISJOINT_SET
#define DISJOINT_SET

// disjoint-set forests using union-by-rank and path compression (sort of).

typedef struct {
    /*
     rank:等級,用來判定將要合併的塊合在左邊還是右邊
     p:用來找到這個塊最先的那個節點
     size:用來記錄現在這個塊有大
     */
  int rank;
  int p;
  int size;
} uni_elt;

class universe {
public:
  universe(int elements);
  ~universe();
  int find(int x);  
  void join(int x, int y);
  int size(int x) const { return elts[x].size; }
  int num_sets() const { return num; }

private:
  uni_elt *elts;
  int num;
};

universe::universe(int elements) {
  //初始化三個引數
  elts = new uni_elt[elements];
  num = elements;
  for (int i = 0; i < elements; i++) {
    elts[i].rank = 0;
    elts[i].size = 1;
    elts[i].p = i;
  }
}

universe::~universe() {
  delete [] elts;
}

int universe::find(int x) {
  int y = x;
  while (y != elts[y].p){
    // 個人理解,這裡不確定。這裡通過不斷的連結,查詢到最初的那個點,那個點作為這個域的標識
    y = elts[y].p;
  }
  elts[x].p = y;
  return y;
}

void universe::join(int x, int y) {
  //連結兩個塊,如果x的rank大於y就合併在x裡面,
  //反之則加在y裡面
  if (elts[x].rank > elts[y].rank) {
    elts[y].p = x;
    elts[x].size += elts[y].size;
  } else {
    elts[x].p = y;
    elts[y].size += elts[x].size;
    if (elts[x].rank == elts[y].rank)
      elts[y].rank++;
  }
  num--;
}

#endif

關於如何平滑(smooth)引數,在filter.h裡,就不貼了(裡面涉及一些數學公式,不看程式碼會看的很累)

個人覺得,大概思路是,先是每個點作為一個塊,對邊的權重進行排序,遍歷所有的邊,不斷結合塊,把邊遍歷過之後,也就合併所有的塊了,在把太小的塊融合起來。

最後,貼上一段原始論文中的描述,供大家參考

Algorithm 1 Segmentation algorithm.
The input is a graph G = (V, E), with n vertices and m edges. The output is a
segmentation of V into components S = (C 1 , . . . , C r ).
0. Sort E into π = (o 1 , . . . , o m ), by non-decreasing edge weight.
1. Start with a segmentation S 0 , where each vertex v i is in its own component.
2. Repeat step 3 for q = 1, . . . , m.
3. Construct S q given S q−1 as follows. Let v i and v j denote the vertices connected
by the q-th edge in the ordering, i.e., o q = (v i , v j ). If v i and v j are in disjoint
components of S q−1 and w(o q ) is small compared to the internal difference of
both those components, then merge the two components otherwise do nothing.
More formally, let C i q−1 be the component of S q−1 containing v i and C j q−1 the
component containing v j . If C i q−1 6 = C j q−1 and w(o q ) ≤ M Int(C i q−1 , C j q−1 ) then
S q is obtained from S q−1 by merging C i q−1 and C j q−1 . Otherwise S q = S q−1 .
4. Return S = S m .

最後的最後,程式碼和論文如下:這裡