1. 程式人生 > >caffe Softmax層TensorRT IPlugin程式碼實現

caffe Softmax層TensorRT IPlugin程式碼實現

TensorRT只支援對channel通道的softmax,對其他通道不支援,而SSD中的softmax不是對channel通道的分類,故要想實現對SSD的TensorRT加速,需要手動編寫softmax層的IPugin程式碼。

//Softmax layer . TensorRT softmax only support cross channel
class SoftmaxPlugin : public IPlugin
{
public:
    SoftmaxPlugin(int softmax_axis) {// 通過建構函式,將prototxt中SoftMax層的引數(分類的維度索引)傳進來
      softmax_axis_ = softmax_axis;
    }
    SoftmaxPlugin(const void* buffer, size_t size) {// 將通過serialize函式儲存到engine檔案中的內容解析出來
        assert(size == 3 * sizeof(int));
        const int* d = reinterpret_cast<const int*>(buffer);
        outer_num_ = d[0];
        shape_softmax_axis_ = d[1];
        inner_num_ = d[2];
    }

    inline int getNbOutputs() const override { return 1; }//第一步
    Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {//第二步
        assert(1 == nbInputDims);
        assert(0 == index);
        assert(3 == inputs[index].nbDims);
        // softmax層的輸入輸出維度完全一致
        return DimsCHW(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
    }

    void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int maxBatchSize) override {//第三步
      assert(softmax_axis_ < 4);
      // softmax層的資料維度以維度softmax_axis_為中心分割開,參考caffe softmax層的原始碼
      int count = 1;
      for (int i = 0; i < softmax_axis_; ++i) {
        if(0 == i)
          count *= maxBatchSize;
        else
          count *= inputs[0].d[i-1];
      }
      outer_num_ = count;
      if(0 == softmax_axis_)
        shape_softmax_axis_ = maxBatchSize;
      else
        shape_softmax_axis_ = inputs[0].d[softmax_axis_-1];
      count = 1;
      for (int i = softmax_axis_+1; i < 4; ++i) {
        if(0 == i)
          count *= maxBatchSize;
        else
          count *= inputs[0].d[i-1];
      }
      inner_num_ = count;
    }

    int initialize() override {//第四步
        // Initialize CUDNN.
        CUDNN_CHECK(cudnnCreate(&handle_));
        cudnn::createTensor4dDesc<float>(&bottom_desc_);
        cudnn::createTensor4dDesc<float>(&top_desc_);

        int N = outer_num_;
        int K = shape_softmax_axis_;
        int H = inner_num_;
        int W = 1;
        cudnn::setTensor4dDesc<float>(&bottom_desc_, N, K, H, W);
        cudnn::setTensor4dDesc<float>(&top_desc_, N, K, H, W);

        handles_setup_ = true;
        return 0;
    }

    size_t getSerializationSize() override {
        return 3 * sizeof(int);
    }

    void serialize(void* buffer) override {//第五步
        int* d = reinterpret_cast<int*>(buffer);
        d[0] = outer_num_; d[1] = shape_softmax_axis_; d[2] = inner_num_;
    }

    inline void terminate() override {//第六步
        // Check that handles have been setup before destroying.
        if (!handles_setup_) { return; }

        cudnnDestroyTensorDescriptor(bottom_desc_);
        cudnnDestroyTensorDescriptor(top_desc_);
        cudnnDestroy(handle_);
    }

    inline size_t getWorkspaceSize(int) const override { return 0; }
    int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override {
        // 參考caffe softmax層cudnn前向計算程式碼
        const float* bottom_data = (float*)inputs[0];
        float* top_data = (float*)outputs[0];
        CUDNN_CHECK(cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
                                        CUDNN_SOFTMAX_MODE_CHANNEL,
                                        cudnn::dataType<float>::one,
                                        bottom_desc_, bottom_data,
                                        cudnn::dataType<float>::zero,
                                        top_desc_, top_data));
        return 0;
    }


protected:
    int outer_num_;
    int inner_num_;
    int softmax_axis_;
    int shape_softmax_axis_;

    bool handles_setup_;
    cudnnHandle_t             handle_;
    cudnnTensorDescriptor_t bottom_desc_;
    cudnnTensorDescriptor_t top_desc_;
};