caffe Softmax層TensorRT IPlugin程式碼實現
阿新 • • 發佈:2018-12-10
TensorRT只支援對channel通道的softmax,對其他通道不支援,而SSD中的softmax不是對channel通道的分類,故要想實現對SSD的TensorRT加速,需要手動編寫softmax層的IPugin程式碼。
//Softmax layer . TensorRT softmax only support cross channel class SoftmaxPlugin : public IPlugin { public: SoftmaxPlugin(int softmax_axis) {// 通過建構函式,將prototxt中SoftMax層的引數(分類的維度索引)傳進來 softmax_axis_ = softmax_axis; } SoftmaxPlugin(const void* buffer, size_t size) {// 將通過serialize函式儲存到engine檔案中的內容解析出來 assert(size == 3 * sizeof(int)); const int* d = reinterpret_cast<const int*>(buffer); outer_num_ = d[0]; shape_softmax_axis_ = d[1]; inner_num_ = d[2]; } inline int getNbOutputs() const override { return 1; }//第一步 Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {//第二步 assert(1 == nbInputDims); assert(0 == index); assert(3 == inputs[index].nbDims); // softmax層的輸入輸出維度完全一致 return DimsCHW(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]); } void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int maxBatchSize) override {//第三步 assert(softmax_axis_ < 4); // softmax層的資料維度以維度softmax_axis_為中心分割開,參考caffe softmax層的原始碼 int count = 1; for (int i = 0; i < softmax_axis_; ++i) { if(0 == i) count *= maxBatchSize; else count *= inputs[0].d[i-1]; } outer_num_ = count; if(0 == softmax_axis_) shape_softmax_axis_ = maxBatchSize; else shape_softmax_axis_ = inputs[0].d[softmax_axis_-1]; count = 1; for (int i = softmax_axis_+1; i < 4; ++i) { if(0 == i) count *= maxBatchSize; else count *= inputs[0].d[i-1]; } inner_num_ = count; } int initialize() override {//第四步 // Initialize CUDNN. CUDNN_CHECK(cudnnCreate(&handle_)); cudnn::createTensor4dDesc<float>(&bottom_desc_); cudnn::createTensor4dDesc<float>(&top_desc_); int N = outer_num_; int K = shape_softmax_axis_; int H = inner_num_; int W = 1; cudnn::setTensor4dDesc<float>(&bottom_desc_, N, K, H, W); cudnn::setTensor4dDesc<float>(&top_desc_, N, K, H, W); handles_setup_ = true; return 0; } size_t getSerializationSize() override { return 3 * sizeof(int); } void serialize(void* buffer) override {//第五步 int* d = reinterpret_cast<int*>(buffer); d[0] = outer_num_; d[1] = shape_softmax_axis_; d[2] = inner_num_; } inline void terminate() override {//第六步 // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(bottom_desc_); cudnnDestroyTensorDescriptor(top_desc_); cudnnDestroy(handle_); } inline size_t getWorkspaceSize(int) const override { return 0; } int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override { // 參考caffe softmax層cudnn前向計算程式碼 const float* bottom_data = (float*)inputs[0]; float* top_data = (float*)outputs[0]; CUDNN_CHECK(cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, cudnn::dataType<float>::one, bottom_desc_, bottom_data, cudnn::dataType<float>::zero, top_desc_, top_data)); return 0; } protected: int outer_num_; int inner_num_; int softmax_axis_; int shape_softmax_axis_; bool handles_setup_; cudnnHandle_t handle_; cudnnTensorDescriptor_t bottom_desc_; cudnnTensorDescriptor_t top_desc_; };