1. 程式人生 > >利用caffe和mxnet 開啟攝像頭,進行人臉檢測

利用caffe和mxnet 開啟攝像頭,進行人臉檢測

# -*- coding:utf-8 -*-
import random
import mxnet as mx
import numpy as np
from sklearn import preprocessing
import base64
import cv2
import time
from easydict import EasyDict as edict
import os
import sys
from scipy import misc
from mtcnn_detector import MtcnnDetector
import numpy as np
caffe_root = '../../caffe-ssd/python'  # 設定當前的工作環境在caffe下
sys.path.insert(0, caffe_root)
import caffe
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))  #放入preprocess路徑
import face_preprocess

caffe.set_device(0)
caffe.set_mode_gpu()
model_def = '../models/sfd/deploy.prototxt'
model_weights = '../models/sfd/SFD.caffemodel'
net = caffe.Net(model_def, model_weights, caffe.TEST)    #把上面新增的兩個變數都作為引數構造一個Net


#********************************SFD***********************************************************************
def sfd_detection(frame, threshold, mode):
    # start = time.time()
    image = frame
    height = image.shape[0]
    width = image.shape[1]

    # 適用於長寬不對等的場景,進行長寬等比例變化
    if mode == 'reg':
        im_shrink = 80.0 / max(image.shape[0], image.shape[1])
        # print(im_shrink)
    # waitkey()
    elif mode == 'global':
        im_shrink = 320.0 / max(image.shape[0], image.shape[1])
    elif mode == 'cached':
        im_shrink = 160.0 / max(image.shape[0], image.shape[1])
    image = cv2.resize(image, None, None, fx=im_shrink, fy=im_shrink, interpolation=cv2.INTER_LINEAR)    #調整輸入樣本大小以便輸入神經網路呦 (3維) 320*320*3
    # cv2.imshow('iamge',image)
    # cv2.waitKey(0)
    # print image.shape[0]
    # print image.shape[1]
    net.blobs['data'].reshape(1, 3, image.shape[0], image.shape[1])

#由於matplotlib載入的image畫素(0-1)之間,圖片:RGB 而caffe需要畫素(0-255),圖片:BGR ,因此要轉換
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

#channel通道提前
    transformer.set_transpose('data', (2, 0, 1))
    transformer.set_mean('data', np.array([104, 117, 123]))
  
#畫素放大,通道RGB->BGR 
    transformer.set_raw_scale('data', 255)
    transformer.set_channel_swap('data', (2, 1, 0))
#處理載入的圖片
    transformed_image = transformer.preprocess('data', image)
    net.blobs['data'].data[...] = transformed_image 
#前向傳播,輸出SSD網路最後一層detction_out的輸出。
    detections = net.forward()['detection_out']                                                      #神經網路的人臉座標輸出(4維) 1*1*23*7 :array
    #SSD網路的最後一層名為'detection_out',該層輸出Blob結構'detection_out'中包含了多組元組結構,每個元組結構包含7個引數,其中第2引數表示分類類別序號,第3個引數表示概率置信度,第3-6引數分別表示目標區域左上及右下的座標,而元組的個數表明該影象中可能的目標個數。
    # print detections.shape[0]
    det_conf = detections[0, 0, :, 2]
    # print det_conf
    det_xmin = detections[0, 0, :, 3]
    det_ymin = detections[0, 0, :, 4]
    det_xmax = detections[0, 0, :, 5]
    det_ymax = detections[0, 0, :, 6]
    bbox = []
    for i in range(det_conf.shape[0]):
        xmin = max(0, int(round(det_xmin[i] * width)))
        ymin = max(0, int(round(det_ymin[i] * height)))
        xmax = min(width - 1, int(round(det_xmax[i] * width)))
        ymax = min(height - 1, int(round(det_ymax[i] * height)))
        score = det_conf[i]
        if score < threshold or xmin >= xmax or ymin >= ymax:
            continue
        # print('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
        # format('person', score, xmin, ymin, xmax, ymax))
        bbox.append([xmin, ymin, xmax, ymax])
        # cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
    # end = time.time()
    # print(end-start)
    # cv2.imshow('result', frame)
    # cv2.waitKey(0)
    return bbox                                                                  #[[0, 0, 239, 239], [0, 0, 239, 239]](返回是網路輸出變換後的人臉畫素座標)
#********************************SFD**********************************************************************************************************



def do_flip(data):
  for idx in xrange(data.shape[0]):
    data[idx,:,:] = np.fliplr(data[idx,:,:])

#***********************************************************MTCNN****************************************************************


#*******************************************************************************************************輸出的圖片大小處理*********
  image_size='112,112'
  _vec = image_size.split(',')  # 1)將str->list  2)將圖片長寬分開
  assert len(_vec) == 2
  image_size = (int(_vec[0]), int(_vec[1]))  # 將列表元素強制為int

# *******************************************************************************************************輸出的圖片大小處理*********


  # *****************************************************************************************************************************捆綁mtcnn 預訓練模型
  ctx = mx.gpu(0)
  model='../models/model-r50-am-lfw/model,0'
  _vec = model.split(',')  # 1)將模型地址與模型標籤分開 2)將str->list
  assert len(_vec) == 2
  prefix = _vec[0]
  epoch = int(_vec[1])
  sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)  # 載入模型  引數1)模型地址 2)模型的標籤
  all_layers = sym.get_internals()  # 檢視模型中間層
  sym = all_layers['fc1_output']
  model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)  # 建立模組
  # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
  model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])  # 繫結模型
  model.set_params(arg_params, aux_params)  # 設定權重引數
#**************************************************************************************************************************************捆綁mtcnn 預訓練模型


  mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')  # 賦值MTCNN模型地址,作為引數輸入探測器類中
  detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True,
                           threshold=[0.0, 0.0, 0.2])  # 傳參的過程,將引數傳入捆綁的模型中


def get_feature(ssd_bbox,detector,model):  # 這裡是將原MTCNN的get_feature和get_input結合



  # ssd_bbox is bgr image
  ret = detector.detect_face_limited(ssd_bbox, det_type=2)  # 呼叫探測器的人臉檢測函式,返回所有bbox和point   det_type=2 means using R+O, else using O'
  #print '************************************************************************************************'
  #print('ret',ret[1])  #5個關鍵點
  if ret is None:
      return None
  bbox, points = ret
  #print('bboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbbox',bbox)
  if bbox.shape[0] == 0:
      return None
  bbox = bbox[0, 0:4]   #定義bbox
 # print('bboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbbox',bbox)
  points = points[0, :].reshape((2, 5)).T  #定義point
  #print('pointspointspointspointspointspointspointspointspointspointspointspointspoints',points)



#MTCNN的標籤儲存(bbox,points)
  # np.savetxt("../label/msra_lmk",(bbox,points))
  # a=np.loadtxt("msra_lmk")
  # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa',a)


  #確定一次
  flip=0
  nimg = face_preprocess.preprocess(ssd_bbox, bbox, points, image_size='112,112')  # 用mtcnn的bbox,point結果,替代cv2的bbox結果 ->這個函式目的是為了將座標變成圖片
 # cv2.imshow('mtcnn_face', nimg)
  cv2.imwrite('./feature/'+str(random.random())+'.jpg',nimg)
  print '********************************************************************************************************************************'


  nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
  aligned = np.transpose(nimg, (2, 0, 1))  # 轉置

  # print(nimg.shape)
  embedding = None
  for flipid in [0, 1]:
      if flipid == 1:
          if flip == 0:
              break
          do_flip(aligned)
      input_blob = np.expand_dims(aligned, axis=0)
      data = mx.nd.array(input_blob)
      db = mx.io.DataBatch(data=(data,))
      model.forward(db, is_train=False)
      _embedding = model.get_outputs()[0].asnumpy()
      # print(_embedding.shape)
      if embedding is None:
          embedding = _embedding
      else:
          embedding += _embedding
  embedding=preprocessing.normalize(embedding).flatten()


  #這裡 返回的是歸一化的圖片特徵
  return embedding


#***********************************************************MTCNN****************************************************************************************************

if __name__ == '__main__':
    # 讀圖片
    image = cv2.imread('/home/chenyu/3.jpg')
    #cv2.imshow('yuantu', image)

    #繫結ssd模型
    caffe.set_device(0)
    caffe.set_mode_gpu()
    model_def = '../models/sfd/deploy.prototxt'
    model_weights = '../models/sfd/SFD.caffemodel'
    net = caffe.Net(model_def, model_weights, caffe.TEST)



    bbox = sfd_detection(image, 0.5, 'global')      #    引數:閾值,模式
    print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++',bbox)
    # *******************************************SDF擴充套件*************************************************等同於進行了根據人臉定位人身,圖片定位壓縮
    offset_height = 160
    offset_width = 160
    frame_height = image.shape[0]
    frame_width = image.shape[1]
    for tbbox in bbox:
        y1 = max(0, tbbox[1] - offset_height)  # 根據人臉畫素座標,進行相對放大,獲取人身定位
        y2 = min(frame_height, tbbox[3] + offset_height)
        x1 = max(0, tbbox[0] - offset_width)
        x2 = min(frame_width, tbbox[2] + offset_width)
        ssd_facetobody = image[y1:y2, x1:x2, :]  # 輸出樣本的人臉特徵圖


        # print image_facetobody
    # **********************************************SDF擴充套件*************************************************  等同於進行了根據人臉定位人身,圖片定位壓縮

    ssd_bbox = image[tbbox[1]:tbbox[3], tbbox[0]:tbbox[2], :]   
    #cv2.imshow('SSD_body', ssd_facetobody)
    #cv2.imshow('SSD_bbox', ssd_bbox)

    #繫結mtcnn模型
    ctx = mx.gpu(0)
    model = '../models/model-r50-am-lfw/model,0'
    _vec = model.split(',')  # 1)將模型地址與模型標籤分開 2)將str->list
    assert len(_vec) == 2
    prefix = _vec[0]
    epoch = int(_vec[1])
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)  # 載入模型檢查點(作用:在每次迭代時,在特殊位置進行點檢測)  引數1)模型地址 2)模型的標籤    返回:sym(symbol):網路模型配置 arg:網路權重字典 aux:附加項字典
    all_layers = sym.get_internals()  # 檢視模型中間層
    sym = all_layers['fc1_output']
    model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)  # 建立模組
    # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
    model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])  # 繫結模型
    model.set_params(arg_params, aux_params)  # 設定權重引數
    # **************************************************************************************************************************************捆綁mtcnn 預訓練模型

    mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')  # 賦值MTCNN模型地址,作為引數輸入探測器類中
    detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True,
                             threshold=[0.0, 0.0, 0.2])  # 傳參的過程,將引數傳入捆綁的模型中

    feature_label = get_feature(ssd_bbox)  # SSD輸出的point放入MTCNN

    tFace = misc.imresize(ssd_facetobody, (160, 160), interp='bilinear')