Caffe原始碼（六）: pooling_layer 分析

阿新 • • 發佈：2019-01-01

簡單介紹

pooling_layer 是Layer 的子類，實現了pooling操作。其主要函式有LayerSetUp，Reshape，Forward_cpu， Backward_cpu。

主要函式

1.LayerSetUp 函式：

template <typename Dtype>
void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype> 
*>& top) {
  PoolingParameter pool_param = this->layer_param_.pooling_param();
  if (pool_param.global_pooling()) {
    CHECK(!(pool_param.has_kernel_size() ||
      pool_param.has_kernel_h() || pool_param.has_kernel_w()))
      << "With Global_pooling: true Filter size cannot specified" 
;
  } else {
    CHECK(!pool_param.has_kernel_size() !=
      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
    CHECK(pool_param.has_kernel_size() ||
      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "For non-square filters both kernel_h and kernel_w are required." 
;
  }
  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
      && pool_param.has_pad_w())
      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
      << "pad is pad OR pad_h and pad_w are required.";
  CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
      && pool_param.has_stride_w())
      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
      << "Stride is stride OR stride_h and stride_w are required.";
  global_pooling_ = pool_param.global_pooling();
  if (global_pooling_) {
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width(); //全域性pooling
  } else {
    if (pool_param.has_kernel_size()) {
      kernel_h_ = kernel_w_ = pool_param.kernel_size();
    } else {
      kernel_h_ = pool_param.kernel_h();
      kernel_w_ = pool_param.kernel_w();
    }//使用者自定義的kernel大小
  }
  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
  if (!pool_param.has_pad_h()) {
    pad_h_ = pad_w_ = pool_param.pad();
  } else {
    pad_h_ = pool_param.pad_h();
    pad_w_ = pool_param.pad_w();
  }//填充
  if (!pool_param.has_stride_h()) {
    stride_h_ = stride_w_ = pool_param.stride();
  } else {
    stride_h_ = pool_param.stride_h();
    stride_w_ = pool_param.stride_w();
  }//步長
  if (global_pooling_) {
    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
      << "With Global_pooling: true; only pad = 0 and stride = 1";
  }
  if (pad_h_ != 0 || pad_w_ != 0) {
    CHECK(this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_AVE
        || this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_MAX)
        << "Padding implemented only for average and max pooling.";
    CHECK_LT(pad_h_, kernel_h_);
    CHECK_LT(pad_w_, kernel_w_);
  }
}//初始化一些引數

2.Reshape 函式：

template <typename Dtype>
void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  channels_ = bottom[0]->channels();
  height_ = bottom[0]->height();
  width_ = bottom[0]->width();
  if (global_pooling_) {
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  }
  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
      //pooling之後的height 和 width
  if (pad_h_ || pad_w_) {
    // If we have padding, ensure that the last pooling starts strictly
    // inside the image (instead of at the padding); otherwise clip the last.
    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
      --pooled_height_;
    }
    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
      --pooled_width_;
    }
    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
  }
  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);//輸出top blob 的shape
  if (top.size() > 1) {
    top[1]->ReshapeLike(*top[0]);
  }
  // If max pooling, we will initialize the vector index part.
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_MAX && top.size() == 1) {
    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
        pooled_width_);
  }//max pooling 反向求導時要用到取最大值的位置，max_idx_就是記錄pooling過程中取max value 的index ，它是一個int型的blob 和輸出top具有相同的shape
  // If stochastic pooling, we will initialize the random index part.
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_STOCHASTIC) {
    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  }//類似於max pooling
}

3.Forward_cpu 函式：

template <typename Dtype>
void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int top_count = top[0]->count();
  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;
  int* mask = NULL;  // suppress warnings about uninitalized variables
  Dtype* top_mask = NULL;
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more code.
  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX:                        
   // max pooling
    // Initialize
    if (use_top_mask) {
      top_mask = top[1]->mutable_cpu_data();
      caffe_set(top_count, Dtype(-1), top_mask);
    } else {
      mask = max_idx_.mutable_cpu_data();
      caffe_set(top_count, -1, mask);
    } //（*1）設為負無窮
    // The main loop
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_);
            int wend = min(wstart + kernel_w_, width_);         
 //這四個量給出未pooling矩陣中確定pooling區域的兩個頂點。
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);                             
            //caffe 資料儲存是一維陣列的形式
 //ph為pooling後輸出top的height index，pool_index為對應一維陣列index。
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                const int index = h * width_ + w;                
                //對應一維陣列的index
                if (bottom_data[index] > top_data[pool_index]) {
 //由（*1）可知該迴圈將bottom中pooling區域（kernel的大小）的最大值放到對應top
                  if (use_top_mask) {
                    top_mask[pool_index] = static_cast<Dtype>(index);
                  } else {
 //記錄top得到的max value在bottom中的index
                  }
                }
              }
            }
          }
        }
        // compute offset
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);    //取下一個channel的mask
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE:    //average pooling
    for (int i = 0; i < top_count; ++i) {
      top_data[i] = 0;                     //將top初始化為0
    }
    // The main loop
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            //pooling 區域的element 個數
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                top_data[ph * pooled_width_ + pw] +=
 //將pooling區域的element個數加起來
              }
            }
            top_data[ph * pooled_width_ + pw] /= pool_size;    //求平均值
          }
        }
        // compute offset
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
      }
    }
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:                    
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}

4.Backward_cpu 函式

關於pooling的求導可以參考這裡

template <typename Dtype>
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//初始化bottom_diff 為0
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more codes.
  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;
  const int* mask = NULL;  // suppress warnings about uninitialized variables
  const Dtype* top_mask = NULL;
  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX:
    // The main loop
    if (use_top_mask) {
      top_mask = top[1]->cpu_data();
    } else {
      mask = max_idx_.cpu_data();
    }
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            const int index = ph * pooled_width_ + pw;
            const int bottom_index =
                use_top_mask ? top_mask[index] : mask[index];
            bottom_diff[bottom_index] += top_diff[index];  //計算“敏感值”分佈
          }
        }
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);                //指向下一個channel
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE:
    // The main loop
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {//遍歷pooling區域
                bottom_diff[h * width_ + w] +=
                    top_diff[ph * pooled_width_ + pw] / pool_size; //反向傳播時各層間“誤差敏感”總和不變，所以對應每個值需要平攤
              }
            }
          }
        }
        // offset
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);//指向下一個channel
      }
    }
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}

Caffe原始碼（六）: pooling_layer 分析

目錄簡單介紹 pooling_layer 是Layer 的子類，實現了pooling操作。其主要函式有LayerSetUp，Reshape，Forward_cpu， Backward_cpu。主要函式 1.LayerSetUp 函

Caffe原始碼（四）：math_functions 分析

轉自：https://blog.csdn.net/seven_first/article/details/47378697#1-caffecpugemm-%E5%87%BD%E6%95%B0 主要函式 math_function 定義了caffe 中用到的一些矩陣操作和數值計算的一些函式，這

Caffe原始碼（五）：conv_layer 分析

目錄簡單介紹首先要明確的一點是：ConvolutionLayer 是 BaseConvolutionLayer的子類，BaseConvolutionLayer 是 Layer 的子類。ConvolutionLayer 除了繼承了相應的成員變數和

Caffe原始碼（三）：layer 分析

目錄簡單介紹 Caffe 中的 layer 是組成 net 的 component . laye 必須執行向前傳播函式，即輸入 Blob bottom，產生一個輸出 Blob top。絕大多數layer 要執行反向傳播函式，即計算關於輸入

rocketmq之原始碼分析netty實現原始碼（六）

netty的服務端核心屬性 public class NettyRemotingServer extends NettyRemo

Giraph源代碼分析（六）——Edge 分析

available align put and mark lin cer mutable oba HamaWhite 原創，轉載請註明出處。歡迎大家增加Giraph 技術交流群： 228591158 歡迎訪問：

NOIP複賽複習（六）演算法分析與排序模板

演算法分析演算法分析的目的是預測演算法所需的資源，如計算時間（CPU 消耗）、記憶體空間（RAM 消耗）、通訊時間（頻寬消耗）等，以及預測演算法的執行時間，即在給定輸入規模時，所執行的基本運算元量，或者稱為演算法複雜度。演算法的執行時間取決於輸入的資料特徵，輸入

【移動開發】關於一對一視訊交友技術原始碼（六）現代播放器原理

在上一篇延遲優化中，我們分享了不少簡單實用的調優技巧。本篇是《一對一視訊直播技術詳解》系列之六：現代播放器原理。近年來，多平臺適配需求的增長導致了流媒體自適應位元速率播放的興起，這迫使 Web 和移動開發者們必須重新思考視訊技術的相關邏輯。首先，巨頭們分分發布了 HLS、HDS 和 Smooth Stre

系統學習NLP（六）--語義分析

轉自：https://www.jianshu.com/p/7463267b0106 對於不同的語言單位，語義分析的任務各不相同。在詞的層次上，語義分析的基本任務是進行詞義消歧（WSD），在句子層面上是語義角色標註（SRL），在篇章層面上是指代消歧，也稱共指消解。詞義消歧由於詞是能夠獨

Zookeeper 原始碼（六）Leader-Follower-Observer

Zookeeper 原始碼（六）Leader-Follower-Observer 上一節介紹了 Leader 選舉的全過程，本節講解一下 Leader-Follower-Observer 伺服器的三種角色。經過 Leader 選舉後各伺服器都能確定自己的角色，下一步就是初始化各自的角色。先回顧一下【Qu

java B2B2C springmvc mybatis電子商務平臺原始碼-（六）分散式配置中心(Spring Cloud Config)

一、簡介在分散式系統中，由於服務數量巨多，為了方便服務配置檔案統一管理，實時更新，所以需要分散式配置中心元件。在Spring Cloud中，有分散式配置中心元件spring cloud config ，它支援配置服務放在配置服務的記憶體中（即本地），也支援放在遠端Git倉庫中。在spring

MFC/Qt下呼叫caffe原始碼（一）---將caffe原始碼生成動態連結庫dll

本人研一，最近想將用caffe訓出的模型，通過MFC做出一個介面，扔進一張圖片，點選預測，即可呼叫預測分類函式完成測試，並且通過MessageBox彈出最終分類的資訊。首先通過查資料總結出兩種方法，第一：直接呼叫編譯好的caffe原始碼；（本次用到的原始碼是classif

MFC/Qt下呼叫caffe原始碼（二）---MFC下呼叫caffe的動態連線庫dll檔案

首先，先看下最後的效果吧。 win7 vs2013 新建一個MFC 工程注：MFC中如何最簡便的方法將圖片顯示到對話方塊中？大家可以搜下，很多種方法，但是我採用的是最簡單的方法，即將opencv視窗繫結到MFC的PIcture control上。在:

白話Spring原始碼（六）：BeanDefinition的註冊過程

上一篇部落格講了bean的建立過程。這次跟大家分享BeanDefinition的註冊過程。一、什麼是BeanDefinition BeanDefinition：就是bean的定義資訊，比如bean的名稱，對應的class，bean的屬性值，bean是否是單列等等，一般是通過xml來定義的，

vue原始碼（六）Vue 選項的合併

本文是學習vue原始碼，之所以轉載過來是方便自己隨時檢視，在這裡要感謝HcySunYang大神，提供的開源vue原始碼解析，寫的非常非常好，簡單易懂，比自己看要容易多了，他的文章連結地址是http://hcysun.me/vue-design/art/ 上一章節我們瞭解了 Vue&nb

菜鳥讀caffe原始碼（一）：protoc.exe的使用

caffe感覺都是別人玩剩下的東西，但是結合目前專案和以後對工作的設想，caffe原始碼還是有很好的學習價值。參考入門書籍《深度學習21天實戰caffe-趙永科》學習protobuffer相關知識，瞭解如何讀取proto.txt等相關引數檔案。路線下載protobu

Caffe學習（六）損失層及其引數設定

機器學習的目的就是通過對訓練樣本輸出與真實值不一致的進行懲罰，得到損失Loss，然後採用一定的優化演算法對loss進行最小優化，進而得到合理的網路權值。本文介紹Caffe中含有的常見的LossLaye

SpringBoot內建的各種Starter是怎樣構建的？--SpringBoot原始碼（六）

**注：該原始碼分析對應SpringBoot版本為2.1.0.RELEASE** # 1 溫故而知新本篇接 [外部配置屬性值是如何被繫結到XxxProperties類屬性上的？--SpringBoot原始碼（五）](https://juejin.im/post/5e689b49e51d4527143e5e2

Flume NG原始碼分析（六）應用程式使用的RpcClient設計

上一篇Flume NG原始碼分析（五）使用ThriftSource通過RPC方式收集日誌介紹了ThriftSource利用Thrfit服務ThriftSourceProtocol來收集日誌。這篇說說flume-ng-sdk中提供給應用層序使用的RpcClient的設計和實現。繼續使用ThriftR

YOLOv2原始碼分析（六）

文章全部YOLOv2原始碼分析我們再次回到了parse_network_cfg函式 //parse_network_cfg else if(lt == ACTIVE){

Caffe原始碼（六）: pooling_layer 分析

目錄

簡單介紹

主要函式

1.LayerSetUp 函式：

2.Reshape 函式：

3.Forward_cpu 函式：

4.Backward_cpu 函式

相關推薦