Caffe框架原始碼剖析(6)—池化層PoolingLayer

阿新 • • 發佈：2018-10-31

卷積層ConvolutionLayer正向傳導的目標層往往是池化層PoolingLayer。池化層通過降取樣來降低卷積層輸出的特徵向量，同時改善結果，不易出現過擬合。最常用的降取樣方法有均值取樣（取區域平均值作為降取樣值）、最大值取樣（取區域最大值作為降取樣值）和隨機取樣（取區域內隨機一個畫素）等。

PoolingLayer類從Layer基類單一繼承而來，沒有派生其它子類。具體定義在pooling_layer.hpp中，

template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
 public:
  explicit PoolingLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "Pooling"; }
  virtual inline int ExactNumBottomBlobs() const { return 1; }
  virtual inline int MinTopBlobs() const { return 1; }
  // 最大值取樣可以額外輸出一個Blob，所以MaxTopBlobs返回2
  virtual inline int MaxTopBlobs() const {
    return (this->layer_param_.pooling_param().pool() ==
            PoolingParameter_PoolMethod_MAX) ? 2 : 1;
  }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  // 卷積區域尺寸
  int kernel_h_, kernel_w_;
  // 卷積平移步幅
  int stride_h_, stride_w_;
  // 影象補齊畫素數
  int pad_h_, pad_w_;
  // 通道
  int channels_;
  // 輸入影象尺寸
  int height_, width_;
  // 池化後尺寸
  int pooled_height_, pooled_width_;
  // 是否全區域池化（將整幅影象降取樣為1x1）
  bool global_pooling_;
  // 隨機取樣點索引
  Blob<Dtype> rand_idx_;
  // 最大值取樣點索引
  Blob<int> max_idx_;
};

具體實現在pooling_layer.cpp中，

template <typename Dtype>
void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  PoolingParameter pool_param = this->layer_param_.pooling_param();
  if (pool_param.global_pooling()) {
    CHECK(!(pool_param.has_kernel_size() ||
      pool_param.has_kernel_h() || pool_param.has_kernel_w()))
      << "With Global_pooling: true Filter size cannot specified";
  } else {
    CHECK(!pool_param.has_kernel_size() !=
      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
    CHECK(pool_param.has_kernel_size() ||
      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "For non-square filters both kernel_h and kernel_w are required.";
  }
  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
      && pool_param.has_pad_w())
      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
      << "pad is pad OR pad_h and pad_w are required.";
  CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
      && pool_param.has_stride_w())
      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
      << "Stride is stride OR stride_h and stride_w are required.";
  global_pooling_ = pool_param.global_pooling();
  // 設定卷積區域尺寸
  if (global_pooling_) {
    // 如果全區域池化，則區域尺寸等於輸入影象尺寸
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  } else {
    if (pool_param.has_kernel_size()) {
      kernel_h_ = kernel_w_ = pool_param.kernel_size();
    } else {
      kernel_h_ = pool_param.kernel_h();
      kernel_w_ = pool_param.kernel_w();
    }
  }
  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
  // 設定影象補齊畫素
  if (!pool_param.has_pad_h()) {
    pad_h_ = pad_w_ = pool_param.pad();
  } else {
    pad_h_ = pool_param.pad_h();
    pad_w_ = pool_param.pad_w();
  }
  // 設定卷積平移步幅
  if (!pool_param.has_stride_h()) {
    stride_h_ = stride_w_ = pool_param.stride();
  } else {
    stride_h_ = pool_param.stride_h();
    stride_w_ = pool_param.stride_w();
  }
  if (global_pooling_) {
    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
      << "With Global_pooling: true; only pad = 0 and stride = 1";
  }
  if (pad_h_ != 0 || pad_w_ != 0) {
    CHECK(this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_AVE
        || this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_MAX)
        << "Padding implemented only for average and max pooling.";
    CHECK_LT(pad_h_, kernel_h_);
    CHECK_LT(pad_w_, kernel_w_);
  }
}

template <typename Dtype>
void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  channels_ = bottom[0]->channels();
  height_ = bottom[0]->height();
  width_ = bottom[0]->width();
  if (global_pooling_) {
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  }
  // 計算降取樣後圖像尺寸
  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
  if (pad_h_ || pad_w_) {
    // 如果有影象補齊，則需要確保不發生越界，否則不做最後一個取樣點
    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
      --pooled_height_;
    }
    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
      --pooled_width_;
    }
    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
  }
  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  if (top.size() > 1) {
    top[1]->ReshapeLike(*top[0]);
  }
  // 如果是最大值取樣，則初始化最大值取樣點索引
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_MAX && top.size() == 1) {
    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
        pooled_width_);
  }
  // 如果是隨機取樣，則初始化隨機取樣點索引
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_STOCHASTIC) {
    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  }
}

// CPU正向傳導
// TODO(Yangqing): 池化操作還可以更快嗎？
template <typename Dtype>
void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int top_count = top[0]->count();
  // 如果top.size() > 1，則額外輸出一個Blob到top[1]
  const bool use_top_mask = top.size() > 1;
  int* mask = NULL;  // suppress warnings about uninitalized variables
  Dtype* top_mask = NULL;
  // switch不同的降取樣方法
  // 將swtich放在for迴圈外用來提高執行速度，雖然這樣會增加程式碼量
  switch (this->layer_param_.pooling_param().pool()) {
  // 最大值取樣
  case PoolingParameter_PoolMethod_MAX:
    // 查詢區域最大值前，將陣列值初始化為-1
    if (use_top_mask) {
      top_mask = top[1]->mutable_cpu_data();
      caffe_set(top_count, Dtype(-1), top_mask);
    } else {
      mask = max_idx_.mutable_cpu_data();
      caffe_set(top_count, -1, mask);
    }
    caffe_set(top_count, Dtype(-FLT_MAX), top_data);
    // 迴圈遍歷區域最大值
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_);
            int wend = min(wstart + kernel_w_, width_);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            const int pool_index = ph * pooled_width_ + pw;
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                const int index = h * width_ + w;
                if (bottom_data[index] > top_data[pool_index]) {
                  top_data[pool_index] = bottom_data[index];
                  if (use_top_mask) {
                    top_mask[pool_index] = static_cast<Dtype>(index);
                  } else {
                    // 位置記錄在max_idx_索引中
                    mask[pool_index] = index;
                  }
                }
              }
            }
          }
        }
        // 加上偏移，跳轉到下一幅影象
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  // 平均值取樣
  case PoolingParameter_PoolMethod_AVE:
    for (int i = 0; i < top_count; ++i) {
      top_data[i] = 0;
    }
    // 迴圈遍歷計算區域平均值
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                top_data[ph * pooled_width_ + pw] +=
                    bottom_data[h * width_ + w];
              }
            }
            top_data[ph * pooled_width_ + pw] /= pool_size;
          }
        }
        // 加上偏移，跳轉到下一幅影象
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
      }
    }
    break;
  // 隨機取樣尚未在CPU端實現
  case PoolingParameter_PoolMethod_STOCHASTIC:
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}

// CPU反向傳導
template <typename Dtype>
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  // 和正向傳導程式碼類似，將switch放在for迴圈外部
  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
  // 如果top.size() > 1，則額外輸出一個Blob到top[1]
  const bool use_top_mask = top.size() > 1;
  const int* mask = NULL;  // suppress warnings about uninitialized variables
  const Dtype* top_mask = NULL;
  switch (this->layer_param_.pooling_param().pool()) {
  // 最大值取樣
  case PoolingParameter_PoolMethod_MAX:
    // 開始迴圈
    if (use_top_mask) {
      top_mask = top[1]->cpu_data();
    } else {
      mask = max_idx_.cpu_data();
    }
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            const int index = ph * pooled_width_ + pw;
            // 從取樣點索引陣列中取出反向傳導的目的索引
            const int bottom_index =
                use_top_mask ? top_mask[index] : mask[index];
            bottom_diff[bottom_index] += top_diff[index];
          }
        }
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  // 平均值取樣
  case PoolingParameter_PoolMethod_AVE:
    // 開始迴圈
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                // 將top偏導平均分配到bottom各點上
                bottom_diff[h * width_ + w] +=
                  top_diff[ph * pooled_width_ + pw] / pool_size;
              }
            }
          }
        }
        // 加上偏移，跳轉到下一幅影象
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
      }
    }
    break;
  // 隨機取樣尚未在CPU端實現
  case PoolingParameter_PoolMethod_STOCHASTIC:
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}

// 如果CPU_ONLY模式則禁止Forward_gpu和Backward_gpu函式
#ifdef CPU_ONLY
STUB_GPU(PoolingLayer);
#endif

Caffe框架原始碼剖析(6)—池化層PoolingLayer

卷積層ConvolutionLayer正向傳導的目標層往往是池化層PoolingLayer。池化層通過降取樣來降低卷積層輸出的特徵向量，同時改善結果，不易出現過擬合。最常用的降取樣方法有均值取樣（取區域平均值作為降取樣值）、最大值取樣（取區域最大值作為降取樣值）和隨機

Caffe框架原始碼剖析(5)—卷積層ConvolutionLayer

ConvolutionLayer是BaseConvolutionLayer的子類，功能較為簡單。類中不包含成員變數，僅包含幾個虛擬函式的實現。 conv_layer.hpp標頭檔案的定義如下： template <typename Dtype> class Convoluti

Caffe框架原始碼剖析(4)—卷積層基類BaseConvolutionLayer

資料層DataLayer正向傳導的目標層是卷積層ConvolutionLayer。卷積層的是用一系列的權重濾波核與輸入影象進行卷積，具體實現是通過將影象展開成向量，作用矩陣乘法實現卷積。同樣，首先看一下卷積層的類圖。

Caffe框架原始碼剖析(3)—資料層DataLayer

Caffe網路正向傳導時，首先進行的是DataLayer資料層的傳導。該層從檔案讀取資料，載入至它的上一層卷積層。反向傳播時，因為資料層不需要反傳，所以它的Backward_cpu()和Backward_gpu()都是空函式。下面看一下DataLayer類圖關係。首先從父類Ba

Caffe框架原始碼剖析(2)—訓練網路

中間因為工程開發等雜七雜八原因暫停了Caffe原始碼分析，現在繼續補上。上篇分析在函式 train() 中建立了網路，接下來就是進入訓練網路步驟了。在函式train()中，使用前一步建立好的solver智慧指標物件呼叫函式Solve()， int train() {

Caffe框架原始碼剖析(1)—構建網路

今天花了一整天時間進行閱讀和除錯Caffe框架程式碼，單單是以Lenet網路進行測試就可見框架的大致工作原理。賈揚清在Caffe中大量使用了STL、模板、智慧指標，有些地方為了效率也犧牲了一些程式碼可讀性，處處彰顯了大牛風範。為了他人閱讀方便，現將程式碼流程簡單梳理一下。 1.LeNe

caffe原始碼池化層

1、標題圖示池化層（前向傳播）池化層其實和卷積層有點相似，有個類似卷積核的視窗按照固定的步長在移動，每個視窗做一定的操作，按照這個操作的型別可以分為兩種池化層：輸入引數如下：輸入： 1 * 3 * 4 * 4 池化核: 2 * 2 pad: 0 步長：2 輸出引數如下

《TensorFlow：實戰Google深度學習框架》——6.3 卷積神經網路常用結構（池化層）

池化層在兩個卷積層之間，可以有效的縮小矩陣的尺寸（也可以減小矩陣深度，但實踐中一般不會這樣使用），co。池從而減少最後全連線層中的引數。池化層既可以加快計算速度也可以防止過度擬合問題的作用。池化層也是通過一個類似過濾器結構完成的，計算方式有兩種：最大池化層：採用最

自己動手實現深度學習框架-6 卷積層和池化層

程式碼倉庫: https://github.com/brandonlyg/cute-dl (轉載請註明出處!) # 目標上個階段使用MLP模型在在MNIST資料集上實現了92%左右的準確率，達到了tensorflow同等模型的水平。這個階段要讓cut

caffe源碼池化層反向傳播

C4D alt convert ec2 ted 操作 src 技術 space 圖示池化層（前向傳播）池化層其實和卷積層有點相似，有個類似卷積核的窗口按照固定的步長在移動，每個窗口做一定的操作，按照這個操作的類型可以分為兩種池化層：輸入參數如下：輸入： 1 *

10、Caffe學習與應用 -訓練（卷積層引數、池化層引數、學習率、正則化）

10.2訓練一、卷積層引數tricks 圖片輸入是2的冪次方，例如32、64、96、224等。卷積核大小是3*3或者5*5。輸入圖片上下左右需要用0來補充，即padding，且假如卷積核大小是5那麼padding就是2（圖片左右上下都補充2），卷積核大小是3pa

卷積層，池化層等，前向/反向傳播原理講解

簡單代碼構建 range expand 使用場景神經網絡右下角 body 今天閑來無事，考慮到以前都沒有好好研究過卷積層、池化層等等的前向/反向傳播的原理，所以今天就研究了一下，參考了一篇微信好文，講解如下：參考鏈接：https://www.zybuluo.co

tensorflow中的卷積和池化層(一)

oat avg 滑動 shape 要求網絡 vol 加速 ali 在官方tutorial的幫助下，我們已經使用了最簡單的CNN用於Mnist的問題，而其實在這個過程中，主要的問題在於如何設置CNN網絡，這和Caffe等框架的原理是一樣的，但是tf的設置似乎更加簡潔、方便，

理解CNN卷積層與池化層計算

CNN網絡卷積層池化層深度學習 OpenCV 概述深度學習中CNN網絡是核心，對CNN網絡來說卷積層與池化層的計算至關重要，不同的步長、填充方式、卷積核大小、池化層策略等都會對最終輸出模型與參數、計算復雜度產生重要影響，本文將從卷積層與池化層計算這些相關參數出發，演示一下不同步長、

全連接層（FC）與全局平均池化層（GAP）

出了類別節點過擬合技術分類思想 ID 連接在卷積神經網絡的最後，往往會出現一兩層全連接層，全連接一般會把卷積輸出的二維特征圖轉化成一維的一個向量，全連接層的每一個節點都與上一層每個節點連接，是把前一層的輸出特征都綜合起來，所以該層的權值參數是最多的。例如在VG

TensorFlow 池化層

www tar float 深度 value pytho version str pan 在 TensorFlow 中使用池化層在下面的練習中，你需要設定池化層的大小，strides，以及相應的 padding。你可以參考 tf.nn.max_pool()。Padding

【TensorFlow】池化層max_pool中兩種paddding操作

max_pool()中padding引數有兩種模式valid和same模式。 Tensorflow的padding和卷積層一樣也有padding操作，兩種不同的操作輸出的結果有區別：函式原型max_pool(value, ksize, strides, padding

【深度學習】基於im2col的展開Python實現卷積層和池化層

一、回顧上一篇我們介紹了，卷積神經網的卷積計算和池化計算，計算過程中視窗一直在移動，那麼我們如何準確的取到視窗內的元素，並進行正確的計算呢？另外，以上我們只考慮的單個輸入資料，如果是批量資料呢？首先，我們先來看看批量資料，是如何計算的二、批處理在神經網路的

【深度學習】卷積神經網路的卷積層和池化層計算

一、簡介 \quad\quad 卷積神經網路（Convolutional neural network, CNN），

【讀書1】【2017】MATLAB與深度學習——池化層(1)

由於它是一個二維的運算操作，文字解釋可能會導致更多的混淆，因此讓我們來舉一個例子。 As it is a two-dimensional operation, andan explanation in text may lead to more confusion, let’s go t

Caffe框架原始碼剖析(6)—池化層PoolingLayer

相關推薦