Caffe框架原始碼剖析(6)—池化層PoolingLayer
阿新 • • 發佈:2018-10-31
卷積層ConvolutionLayer正向傳導的目標層往往是池化層PoolingLayer。池化層通過降取樣來降低卷積層輸出的特徵向量,同時改善結果,不易出現過擬合。最常用的降取樣方法有均值取樣(取區域平均值作為降取樣值)、最大值取樣(取區域最大值作為降取樣值)和隨機取樣(取區域內隨機一個畫素)等。
PoolingLayer類從Layer基類單一繼承而來,沒有派生其它子類。具體定義在pooling_layer.hpp中,
template <typename Dtype> class PoolingLayer : public Layer<Dtype> { public: explicit PoolingLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "Pooling"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 1; } // 最大值取樣可以額外輸出一個Blob,所以MaxTopBlobs返回2 virtual inline int MaxTopBlobs() const { return (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) ? 2 : 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); // 卷積區域尺寸 int kernel_h_, kernel_w_; // 卷積平移步幅 int stride_h_, stride_w_; // 影象補齊畫素數 int pad_h_, pad_w_; // 通道 int channels_; // 輸入影象尺寸 int height_, width_; // 池化後尺寸 int pooled_height_, pooled_width_; // 是否全區域池化(將整幅影象降取樣為1x1) bool global_pooling_; // 隨機取樣點索引 Blob<Dtype> rand_idx_; // 最大值取樣點索引 Blob<int> max_idx_; };
具體實現在pooling_layer.cpp中,
template <typename Dtype> void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { PoolingParameter pool_param = this->layer_param_.pooling_param(); if (pool_param.global_pooling()) { CHECK(!(pool_param.has_kernel_size() || pool_param.has_kernel_h() || pool_param.has_kernel_w())) << "With Global_pooling: true Filter size cannot specified"; } else { CHECK(!pool_param.has_kernel_size() != !(pool_param.has_kernel_h() && pool_param.has_kernel_w())) << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; CHECK(pool_param.has_kernel_size() || (pool_param.has_kernel_h() && pool_param.has_kernel_w())) << "For non-square filters both kernel_h and kernel_w are required."; } CHECK((!pool_param.has_pad() && pool_param.has_pad_h() && pool_param.has_pad_w()) || (!pool_param.has_pad_h() && !pool_param.has_pad_w())) << "pad is pad OR pad_h and pad_w are required."; CHECK((!pool_param.has_stride() && pool_param.has_stride_h() && pool_param.has_stride_w()) || (!pool_param.has_stride_h() && !pool_param.has_stride_w())) << "Stride is stride OR stride_h and stride_w are required."; global_pooling_ = pool_param.global_pooling(); // 設定卷積區域尺寸 if (global_pooling_) { // 如果全區域池化,則區域尺寸等於輸入影象尺寸 kernel_h_ = bottom[0]->height(); kernel_w_ = bottom[0]->width(); } else { if (pool_param.has_kernel_size()) { kernel_h_ = kernel_w_ = pool_param.kernel_size(); } else { kernel_h_ = pool_param.kernel_h(); kernel_w_ = pool_param.kernel_w(); } } CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; // 設定影象補齊畫素 if (!pool_param.has_pad_h()) { pad_h_ = pad_w_ = pool_param.pad(); } else { pad_h_ = pool_param.pad_h(); pad_w_ = pool_param.pad_w(); } // 設定卷積平移步幅 if (!pool_param.has_stride_h()) { stride_h_ = stride_w_ = pool_param.stride(); } else { stride_h_ = pool_param.stride_h(); stride_w_ = pool_param.stride_w(); } if (global_pooling_) { CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1) << "With Global_pooling: true; only pad = 0 and stride = 1"; } if (pad_h_ != 0 || pad_w_ != 0) { CHECK(this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_AVE || this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) << "Padding implemented only for average and max pooling."; CHECK_LT(pad_h_, kernel_h_); CHECK_LT(pad_w_, kernel_w_); } } template <typename Dtype> void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); if (global_pooling_) { kernel_h_ = bottom[0]->height(); kernel_w_ = bottom[0]->width(); } // 計算降取樣後圖像尺寸 pooled_height_ = static_cast<int>(ceil(static_cast<float>( height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1; pooled_width_ = static_cast<int>(ceil(static_cast<float>( width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1; if (pad_h_ || pad_w_) { // 如果有影象補齊,則需要確保不發生越界,否則不做最後一個取樣點 if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) { --pooled_height_; } if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) { --pooled_width_; } CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_); CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_); } top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); if (top.size() > 1) { top[1]->ReshapeLike(*top[0]); } // 如果是最大值取樣,則初始化最大值取樣點索引 if (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX && top.size() == 1) { max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); } // 如果是隨機取樣,則初始化隨機取樣點索引 if (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_STOCHASTIC) { rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); } } // CPU正向傳導 // TODO(Yangqing): 池化操作還可以更快嗎? template <typename Dtype> void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int top_count = top[0]->count(); // 如果top.size() > 1,則額外輸出一個Blob到top[1] const bool use_top_mask = top.size() > 1; int* mask = NULL; // suppress warnings about uninitalized variables Dtype* top_mask = NULL; // switch不同的降取樣方法 // 將swtich放在for迴圈外用來提高執行速度,雖然這樣會增加程式碼量 switch (this->layer_param_.pooling_param().pool()) { // 最大值取樣 case PoolingParameter_PoolMethod_MAX: // 查詢區域最大值前,將陣列值初始化為-1 if (use_top_mask) { top_mask = top[1]->mutable_cpu_data(); caffe_set(top_count, Dtype(-1), top_mask); } else { mask = max_idx_.mutable_cpu_data(); caffe_set(top_count, -1, mask); } caffe_set(top_count, Dtype(-FLT_MAX), top_data); // 迴圈遍歷區域最大值 for (int n = 0; n < bottom[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_); int wend = min(wstart + kernel_w_, width_); hstart = max(hstart, 0); wstart = max(wstart, 0); const int pool_index = ph * pooled_width_ + pw; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { const int index = h * width_ + w; if (bottom_data[index] > top_data[pool_index]) { top_data[pool_index] = bottom_data[index]; if (use_top_mask) { top_mask[pool_index] = static_cast<Dtype>(index); } else { // 位置記錄在max_idx_索引中 mask[pool_index] = index; } } } } } } // 加上偏移,跳轉到下一幅影象 bottom_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); if (use_top_mask) { top_mask += top[0]->offset(0, 1); } else { mask += top[0]->offset(0, 1); } } } break; // 平均值取樣 case PoolingParameter_PoolMethod_AVE: for (int i = 0; i < top_count; ++i) { top_data[i] = 0; } // 迴圈遍歷計算區域平均值 for (int n = 0; n < bottom[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_ + pad_h_); int wend = min(wstart + kernel_w_, width_ + pad_w_); int pool_size = (hend - hstart) * (wend - wstart); hstart = max(hstart, 0); wstart = max(wstart, 0); hend = min(hend, height_); wend = min(wend, width_); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { top_data[ph * pooled_width_ + pw] += bottom_data[h * width_ + w]; } } top_data[ph * pooled_width_ + pw] /= pool_size; } } // 加上偏移,跳轉到下一幅影象 bottom_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); } } break; // 隨機取樣尚未在CPU端實現 case PoolingParameter_PoolMethod_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } } // CPU反向傳導 template <typename Dtype> void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); // 和正向傳導程式碼類似,將switch放在for迴圈外部 caffe_set(bottom[0]->count(), Dtype(0), bottom_diff); // 如果top.size() > 1,則額外輸出一個Blob到top[1] const bool use_top_mask = top.size() > 1; const int* mask = NULL; // suppress warnings about uninitialized variables const Dtype* top_mask = NULL; switch (this->layer_param_.pooling_param().pool()) { // 最大值取樣 case PoolingParameter_PoolMethod_MAX: // 開始迴圈 if (use_top_mask) { top_mask = top[1]->cpu_data(); } else { mask = max_idx_.cpu_data(); } for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { const int index = ph * pooled_width_ + pw; // 從取樣點索引陣列中取出反向傳導的目的索引 const int bottom_index = use_top_mask ? top_mask[index] : mask[index]; bottom_diff[bottom_index] += top_diff[index]; } } bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); if (use_top_mask) { top_mask += top[0]->offset(0, 1); } else { mask += top[0]->offset(0, 1); } } } break; // 平均值取樣 case PoolingParameter_PoolMethod_AVE: // 開始迴圈 for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_ + pad_h_); int wend = min(wstart + kernel_w_, width_ + pad_w_); int pool_size = (hend - hstart) * (wend - wstart); hstart = max(hstart, 0); wstart = max(wstart, 0); hend = min(hend, height_); wend = min(wend, width_); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { // 將top偏導平均分配到bottom各點上 bottom_diff[h * width_ + w] += top_diff[ph * pooled_width_ + pw] / pool_size; } } } } // 加上偏移,跳轉到下一幅影象 bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); } } break; // 隨機取樣尚未在CPU端實現 case PoolingParameter_PoolMethod_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } } // 如果CPU_ONLY模式則禁止Forward_gpu和Backward_gpu函式 #ifdef CPU_ONLY STUB_GPU(PoolingLayer); #endif