1. 程式人生 > >移動端unet人像分割模型--2

移動端unet人像分割模型--2

  前一篇blog裡提到的錯誤果然是mxnet網路的問題,pool5誤敲成pool4修改之後,ncnn就不再crash,不過ncnn的mxnet2ncnn這個工具應該多加一些診斷確保轉換的模型引數一致才對。

  只是事情也沒那麼一帆風順,轉成ncnn後的預測結果死活不對。沒辦法,只能一層層去檢查,寫了幾個簡單的工具可以列印中間隱藏層的結果。

  check.py

import os
os.environ["MXNET_BACKWARD_DO_MIRROR"] = "1"
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
import sys
import cv2
import mxnet as mx
from mxnet import ndarray as F
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
from unetdataiter import UnetDataIter
import matplotlib.pyplot as plt
from unet import build_unet

np.set_printoptions(threshold=np.inf)

def post_process_mask(label, img_cols, img_rows, n_classes, p=0.5):
    pr = label.reshape(n_classes, img_cols, img_rows).transpose([1,2,0]).argmax(axis=2)
    return (pr*255).asnumpy()

def ncnn_output(label):
    #pr = label.reshape(channels, img_cols, img_rows).transpose([1,2,0])
    pr = label.transpose([1,2,0])
    return pr.asnumpy()


def load_image(img, width, height):
    im = np.zeros((height, width, 3), dtype='uint8')
    #im[:, :, :] = 128

    if img.shape[0] >= img.shape[1]:
        scale = img.shape[0] / height
        new_width = int(img.shape[1] / scale)
        diff = (width - new_width) // 2
        img = cv2.resize(img, (new_width, height))

        im[:, diff:diff + new_width, :] = img
    else:
        scale = img.shape[1] / width
        new_height = int(img.shape[0] / scale)
        diff = (height - new_height) // 2

        img = cv2.resize(img, (width, new_height))
        im[diff:diff + new_height, :, :] = img

    im = np.float32(im) / 255.0

    return [im.transpose((2,0,1))]

def main():
    batch_size = 16
    n_classes = 2
    img_width = 256
    img_height = 256
    #img_width = 96
    #img_height = 96

    ctx = [mx.gpu(0)]

    # sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 20)
    #unet_sym = build_unet(batch_size, img_width, img_height, False)
    # unet = mx.mod.Module(symbol=unet_sym, context=ctx, label_names=None)

    sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 0)
    all_layers = sym.get_internals()
    print(all_layers.list_outputs())
    unet = mx.mod.Module(symbol=all_layers['conv11_1_output'], context=ctx, label_names=None)
    #unet = mx.mod.Module(symbol=all_layers['pool5_output'], context=ctx, label_names=None)

    unet.bind(for_training=False, data_shapes=[['data', (batch_size, 3, img_width, img_height)]], label_shapes=unet._label_shapes)
    #unet.set_params(arg_params, aux_params, allow_missing=True)
    unet.set_params(arg_params, aux_params)

    testimg = cv2.imread(sys.argv[1], 1)
    img = load_image(testimg, img_width, img_height)
    unet.predict(mx.io.NDArrayIter(data=[img]))

    outputs = unet.get_outputs()[0]
    print(outputs[0].shape)
    output = ncnn_output(outputs[0])
    print(output) 

    #keys = unet.get_params()[0].keys() # 列出所有權重名稱
    #print(keys)
    #conv_w = unet.get_params()[0]['trans_conv6_weight'] # 獲取想要檢視的權重資訊
    #print(conv_w.shape)
    #print(conv_w.asnumpy()) # 檢視具體數值

    #cv2.imshow('test', testimg)
    #cv2.imshow('mask', post_process_mask(outputs[0], img_width, img_height, n_classes))
    #cv2.waitKey()

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("illegal parameters")
        sys.exit(0)

    main()

  在這個基礎之上,發現是第一次反捲積就出了問題(mxnet神經網路trans_conv6的輸出)。結果完全不一致,按個人理解,反捲積演算法會出問題的可能性比較小,所以把mxnet這一層的權重值列印了出來(上面註釋掉的程式碼)。再在mxnet2ncnn的程式碼裡把對應的引數列印,最後發現是num_group出了問題,簡單處理就是把mxnet2ncnn.cpp裡的反捲積num_group固定為1,終於解決問題。得到正確的輸出結果:

  中間還遇到一些ncnn和mxnet之間影象格式之類的轉換問題,特別是浮點數的處理,就不囉嗦了,直接上程式碼。

#include "net.h"
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <time.h>
#include <algorithm>
#include <map>
#include <iostream>
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;

#define INPUT_WIDTH     256
#define INPUT_HEIGHT    256

int main(int argc, char** argv) {
    if (argc < 2) {
        printf("illegal parameters!");
        exit(0);
    }

    ncnn::Net Unet;

    Unet.load_param("../models/ncnn.param");
    Unet.load_model("../models/ncnn.bin");

    cv::Scalar value = Scalar(0,0,0);
    cv::Mat src;
    cv::Mat tmp;
    src = cv::imread(argv[1]);

    if (src.size().width > src.size().height) {
        int top = (src.size().width - src.size().height) / 2;
        int bottom = (src.size().width - src.size().height) - top;
        cv::copyMakeBorder(src, tmp, top, bottom, 0, 0, BORDER_CONSTANT, value);
    } else {
        int left = (src.size().height - src.size().width) / 2;
        int right = (src.size().height - src.size().width) - left;
        cv::copyMakeBorder(src, tmp, 0, 0, left, right, BORDER_CONSTANT, value);
    }

    cv::Mat tmp1;
    cv::resize(tmp, tmp1, cv::Size(INPUT_WIDTH, INPUT_HEIGHT), CV_INTER_CUBIC);

    cv::Mat image;
    tmp1.convertTo(image, CV_32FC3, 1/255.0);

    std::cout << "image element type "<< image.type() << " " << image.cols << " " << image.rows << std::endl;

    // std::cout << src.cols << " " << src.rows << " " << image.cols << " " << image.rows << std::endl;
    //cv::imshow("test", image);
    //cv::waitKey();

    //ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR2RGB, image.cols, image.rows);

    // cv32fc3 的佈局是 hwc ncnn的Mat佈局是 chw 需要調整排布
    float *srcdata = (float*)image.data;
    float *data = new float[INPUT_WIDTH*INPUT_HEIGHT*3];
    for (int i = 0; i < INPUT_HEIGHT; i++)
       for (int j = 0; j < INPUT_WIDTH; j++)
           for (int k = 0; k < 3; k++) {
              data[k*INPUT_HEIGHT*INPUT_WIDTH + i*INPUT_WIDTH + j] = srcdata[i*INPUT_WIDTH*3 + j*3 + k];
           }
    ncnn::Mat in(image.rows*image.cols*3, data);
    in = in.reshape(256, 256, 3);

    //ncnn::Mat in;

    //resize_bilinear(ncnn_img, in, INPUT_WIDTH, INPUT_HEIGHT);

    ncnn::Extractor ex = Unet.create_extractor();

    ex.set_light_mode(true);
    //sex.set_num_threads(4);

    ex.input("data", in);

    ncnn::Mat mask;
    //ex.extract("relu5_2_splitncnn_0", mask);
    //ex.extract("trans_conv6", mask);
    ex.extract("conv11_1", mask);
    //ex.extract("pool5", mask);

    std::cout << "whc " << mask.w << " " << mask.h << " " << mask.c << std::endl;
#if 1
    cv::Mat cv_img = cv::Mat::zeros(INPUT_WIDTH,INPUT_HEIGHT,CV_8UC1);
//    mask.to_pixels(cv_img.data, ncnn::Mat::PIXEL_GRAY);

    {
    float *srcdata = (float*)mask.data;
    unsigned char *data = cv_img.data;

    for (int i = 0; i < mask.h; i++)
       for (int j = 0; j < mask.w; j++) {
         float tmp = srcdata[0*mask.w*mask.h+i*mask.w+j];
         int maxk = 0;
         for (int k = 0; k < mask.c; k++) {
           if (tmp < srcdata[k*mask.w*mask.h+i*mask.w+j]) {
             tmp = srcdata[k*mask.w*mask.h+i*mask.w+j];
             maxk = k;
           }
           //std::cout << srcdata[k*mask.w*mask.h+i*mask.w+j] << std::endl;
         }
         data[i*INPUT_WIDTH + j] = maxk;
       }
    }
    
    cv_img *= 255;
    cv::imshow("test", cv_img);
    cv::waitKey();
#endif
    return 0;
}

  至此,功能完成,有興趣的請移步:https://github.com/xuduo35/unet_mxnet2ncnn

  另外,除錯過程發現,ncnn的中間層輸出和mxnet的輸出不是完全一致,可能是有一些引數或者運算細節問題,不影響最後mask結果,暫時就不管了。