1. 程式人生 > >SSD網路解析之SmoothL1LossLayer層

SSD網路解析之SmoothL1LossLayer層

SSD網路中的SmoothL1LossLayer層借鑑於Fast R-CNN,用於計算smooth L1損失,其中的光滑L1函式如下:

                                                               smooth_{_{L1}}(x)=\left\{\begin{matrix} 0.5x^{^{2}}, |x|<1\\ |x|-0.5,|x|\geqslant 1 \end{matrix}\right.

其導函式為:

                                                                smooth_{L1}^{'}(x)=\left\{\begin{matrix} x,-1< x<1\\ 1,x\geqslant 1 \\ -1,x\leqslant -1 \end{matrix}\right.

之所以稱為光滑L1函式,是因為此函式處處可導,而原L1函式在x=0處是不可導的。

smooth L1損失為:

                                                                E=\frac{1}{N}\sum_{i=1}^{N}\sum_{j=1}^{k}smooth_{L1}(\hat{y}_{ij}-y_{ij})

其中y_{i}=[y_{i1},y_{i2},...,y_{ik}]^{T}為標籤向量;\hat{y}_{i}為預測向量。

在補充完上述了理論知識後,我們開始解析原始碼。

該層沒有單獨在caffe.proto中指定引數,其傳入的一些引數,如loss_weight均來自LayerParameter,大家自行參閱一下。

(1)smooth_L1_loss_layer.hpp標頭檔案

// ------------------------------------------------------------------
// Fast R-CNN
// copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// Modified by Wei Liu
// ------------------------------------------------------------------

#ifndef CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_
#define CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

/**
 * @brief Computes the SmoothL1 loss as introduced in:@f$
 *  Fast R-CNN, Ross Girshick, ICCV 2015.
 */
template <typename Dtype>
class SmoothL1LossLayer : public LossLayer<Dtype> { //繼承於LossLayer
 public:
  explicit SmoothL1LossLayer(const LayerParameter& param)
      : LossLayer<Dtype>(param), diff_() {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top); //層建立函式
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top); 

  virtual inline const char* type() const { return "SmoothL1Loss"; }

  virtual inline int MinBottomBlobs() const { return 2; } //輸入blob最少為2個
  virtual inline int MaxBottomBlobs() const { return 3; } //輸入blob最多為3個

  /**
   * Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
   * to both inputs -- override to return true and always allow force_backward.
   */
  //這裡和caffe中的EuclideanLoss層一樣(兩個輸入blob都能進行反向傳播)
  //可參見https://blog.csdn.net/qq_21368481/article/details/81950538
  virtual inline bool AllowForceBackward(const int bottom_index) const {
    return true;
  }

 protected:
  /// @copydoc SmoothL1LossLayer
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  Blob<Dtype> diff_;
  Blob<Dtype> errors_;
  bool has_weights_;
};

}  // namespace caffe

#endif  // CAFFE_SMOOTH_L1_LOSS_LAYER_HPP_

(2)CPU實現:smooth_L1_loss_layer.cpp

// ------------------------------------------------------------------
// Fast R-CNN
// copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// Modified by Wei Liu
// ------------------------------------------------------------------

#include <vector>

#include "caffe/layers/smooth_L1_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::LayerSetUp(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::LayerSetUp(bottom, top);
  has_weights_ = (bottom.size() == 3);  //是否有權重,其中bottom[2]儲存的是bottom[0]中每一元素在smoothL1損失函式中所佔的權重
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Reshape(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::Reshape(bottom, top);
  //檢查bottom[0]和bottom[1]維度是不是一致
  CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
  CHECK_EQ(bottom[0]->height(), bottom[1]->height());
  CHECK_EQ(bottom[0]->width(), bottom[1]->width());
  if (has_weights_) { //檢查bottom[0]和bottom[2]維度是不是一致
    CHECK_EQ(bottom[0]->channels(), bottom[2]->channels());
    CHECK_EQ(bottom[0]->height(), bottom[2]->height());
    CHECK_EQ(bottom[0]->width(), bottom[2]->width());
  }
  //diff_,errors_的大小和bottom[0]大小一致
  diff_.Reshape(bottom[0]->num(), bottom[0]->channels(),
      bottom[0]->height(), bottom[0]->width());
  errors_.Reshape(bottom[0]->num(), bottom[0]->channels(),
      bottom[0]->height(), bottom[0]->width());
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  caffe_sub(
      count,
      bottom[0]->cpu_data(),
      bottom[1]->cpu_data(),
      diff_.mutable_cpu_data()); //caffe_sub()實現逐元素相減
  if (has_weights_) {
    caffe_mul(
        count,
        bottom[2]->cpu_data(),
        diff_.cpu_data(),
        diff_.mutable_cpu_data());  // d := w * (b0 - b1) caffe_mul()實現逐元素相乘
  }
  const Dtype* diff_data = diff_.cpu_data();
  Dtype* error_data = errors_.mutable_cpu_data();
  for (int i = 0; i < count; ++i) {
    Dtype val = diff_data[i];
    Dtype abs_val = fabs(val);
    //分段實現smoothL1損失的計算
    if (abs_val < 1.) {
      error_data[i] = 0.5 * val * val;
    } else {
      error_data[i] = abs_val - 0.5;
    }
  }
  top[0]->mutable_cpu_data()[0] =
      caffe_cpu_asum(count, errors_.cpu_data()) / bottom[0]->num(); //caffe_cpu_asum()計算所有元素的絕對值之和
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  int count = diff_.count();
  Dtype* diff_data = diff_.mutable_cpu_data();
  for (int i = 0; i < count; ++i) {
    Dtype val = diff_data[i];
    // f'(x) = x         if |x| < 1
    //       = sign(x)   otherwise
    if (fabs(val) < 1.) {
      diff_data[i] = val;
    } else {
      diff_data[i] = (Dtype(0) < val) - (val < Dtype(0)); //實現sign(x),即x>0時,sign(x)=1,x<0時,sign(x)=-1
    }
  }
  for (int i = 0; i < 2; ++i) {
    if (propagate_down[i]) {
      const Dtype sign = (i == 0) ? 1 : -1; //對應於兩個輸入blob,取不同的數值
      const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
      //caffe_cpu_axpby實現b=alpha*a+beta*b(a和b均為向量)
      caffe_cpu_axpby(
          bottom[i]->count(),               // count
          alpha,                            // alpha
          diff_.cpu_data(),                 // a
          Dtype(0),                         // beta
          bottom[i]->mutable_cpu_diff());   // b
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(SmoothL1LossLayer);
#endif

INSTANTIATE_CLASS(SmoothL1LossLayer);
REGISTER_LAYER_CLASS(SmoothL1Loss);

}  // namespace caffe

其中的bottom[2]是權重,舉個例子如下:

                                                                                  y=\sum_{i=1}^{n}a_{i}smooth_{L1}(x_{i})

這裡的a_{i}組成的向量即為bottom[2],而x_{i}=bottom[0](i)-bottom[1](i)

從程式碼中也可以看出,損失函式的導數為:

當propagate_down[0] = true時,\frac{\partial E}{\partial \hat{y}_{ij}}=\left\{\begin{matrix} \frac{1}{N}(\hat{y}_{ij}-y_{ij}),|\hat{y}_{ij}-y_{ij}|<1\\ \frac{1}{N}sign(\hat{y}_{ij}-y_{ij}),otherwise \end{matrix}\right.

當propagate_down[1] = true時,\frac{\partial E}{\partial y_{ij}}=\left\{\begin{matrix} -\frac{1}{N}(\hat{y}_{ij}-y_{ij}),|\hat{y}_{ij}-y_{ij}|<1\\ -\frac{1}{N}sign(\hat{y}_{ij}-y_{ij}),otherwise \end{matrix}\right.

另外需要注意的是,這裡的兩個輸入blob都可以反向傳播的意思其實是該層可以允許預測值和標籤放反,即允許y_{ij}為預測向量,\hat{y}_{ij}為標籤向量,但需要設定propagate_down[1] = true或設定force_backward。

(3)GPU實現:smooth_L1_loss_layer.cu

// ------------------------------------------------------------------
// Fast R-CNN
// copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// Modified by Wei Liu
// ------------------------------------------------------------------

#include <vector>

#include "caffe/layers/smooth_L1_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {
//計算smooth L1值
template <typename Dtype>
__global__ void SmoothL1Forward(const int n, const Dtype* in, Dtype* out) {
  // f(x) = 0.5 * x^2    if |x| < 1
  //        |x| - 0.5    otherwise
  CUDA_KERNEL_LOOP(index, n) {  //相當於for迴圈,只是是多執行緒的for迴圈
    Dtype val = in[index];
    Dtype abs_val = abs(val);
    if (abs_val < 1) {
      out[index] = 0.5 * val * val;
    } else {
      out[index] = abs_val - 0.5;
    }
  }
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  caffe_gpu_sub(
      count,
      bottom[0]->gpu_data(),
      bottom[1]->gpu_data(),
      diff_.mutable_gpu_data());    // d := b0 - b1
  if (has_weights_) {
    caffe_gpu_mul(
        count,
        bottom[2]->gpu_data(),
        diff_.gpu_data(),
        diff_.mutable_gpu_data());  // d := w * (b0 - b1)
  }
  // NOLINT_NEXT_LINE(whitespace/operators)
  SmoothL1Forward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
      count, diff_.gpu_data(), errors_.mutable_gpu_data());  //計算每一元素的smooth L1值
  CUDA_POST_KERNEL_CHECK;

  Dtype loss;
  caffe_gpu_asum(count, errors_.gpu_data(), &loss); //和caffe_cpu_asum實現一樣的功能
  top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num(); //注意輸出blob呼叫的是cpu_data/cpu_diff(cpu和gpu的資料是同步的)
}

//計算smooth L1的導函式
template <typename Dtype>
__global__ void SmoothL1Backward(const int n, const Dtype* in, Dtype* out) {
  // f'(x) = x         if |x| < 1
  //       = sign(x)   otherwise
  CUDA_KERNEL_LOOP(index, n) {
    Dtype val = in[index];
    Dtype abs_val = abs(val);
    if (abs_val < 1) {
      out[index] = val;
    } else {
      out[index] = (Dtype(0) < val) - (val < Dtype(0));
    }
  }
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  int count = diff_.count();
  // NOLINT_NEXT_LINE(whitespace/operators)
  SmoothL1Backward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
      count, diff_.gpu_data(), diff_.mutable_gpu_data());
  CUDA_POST_KERNEL_CHECK;
  //計算損失函式的導數(梯度)
  for (int i = 0; i < 2; ++i) {
    if (propagate_down[i]) {
      const Dtype sign = (i == 0) ? 1 : -1;
      const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
      //和caffe_cpu_axpby實現一樣的功能
      caffe_gpu_axpby(
          bottom[i]->count(),              // count
          alpha,                           // alpha
          diff_.gpu_data(),                // x
          Dtype(0),                        // beta
          bottom[i]->mutable_gpu_diff());  // y
    }
  }
}

INSTANTIATE_LAYER_GPU_FUNCS(SmoothL1LossLayer);

}  // namespace caffe