利用caffe和mxnet 開啟攝像頭,進行人臉檢測
阿新 • • 發佈:2018-12-14
# -*- coding:utf-8 -*- import random import mxnet as mx import numpy as np from sklearn import preprocessing import base64 import cv2 import time from easydict import EasyDict as edict import os import sys from scipy import misc from mtcnn_detector import MtcnnDetector import numpy as np caffe_root = '../../caffe-ssd/python' # 設定當前的工作環境在caffe下 sys.path.insert(0, caffe_root) import caffe sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common')) #放入preprocess路徑 import face_preprocess caffe.set_device(0) caffe.set_mode_gpu() model_def = '../models/sfd/deploy.prototxt' model_weights = '../models/sfd/SFD.caffemodel' net = caffe.Net(model_def, model_weights, caffe.TEST) #把上面新增的兩個變數都作為引數構造一個Net #********************************SFD*********************************************************************** def sfd_detection(frame, threshold, mode): # start = time.time() image = frame height = image.shape[0] width = image.shape[1] # 適用於長寬不對等的場景,進行長寬等比例變化 if mode == 'reg': im_shrink = 80.0 / max(image.shape[0], image.shape[1]) # print(im_shrink) # waitkey() elif mode == 'global': im_shrink = 320.0 / max(image.shape[0], image.shape[1]) elif mode == 'cached': im_shrink = 160.0 / max(image.shape[0], image.shape[1]) image = cv2.resize(image, None, None, fx=im_shrink, fy=im_shrink, interpolation=cv2.INTER_LINEAR) #調整輸入樣本大小以便輸入神經網路呦 (3維) 320*320*3 # cv2.imshow('iamge',image) # cv2.waitKey(0) # print image.shape[0] # print image.shape[1] net.blobs['data'].reshape(1, 3, image.shape[0], image.shape[1]) #由於matplotlib載入的image畫素(0-1)之間,圖片:RGB 而caffe需要畫素(0-255),圖片:BGR ,因此要轉換 transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) #channel通道提前 transformer.set_transpose('data', (2, 0, 1)) transformer.set_mean('data', np.array([104, 117, 123])) #畫素放大,通道RGB->BGR transformer.set_raw_scale('data', 255) transformer.set_channel_swap('data', (2, 1, 0)) #處理載入的圖片 transformed_image = transformer.preprocess('data', image) net.blobs['data'].data[...] = transformed_image #前向傳播,輸出SSD網路最後一層detction_out的輸出。 detections = net.forward()['detection_out'] #神經網路的人臉座標輸出(4維) 1*1*23*7 :array #SSD網路的最後一層名為'detection_out',該層輸出Blob結構'detection_out'中包含了多組元組結構,每個元組結構包含7個引數,其中第2引數表示分類類別序號,第3個引數表示概率置信度,第3-6引數分別表示目標區域左上及右下的座標,而元組的個數表明該影象中可能的目標個數。 # print detections.shape[0] det_conf = detections[0, 0, :, 2] # print det_conf det_xmin = detections[0, 0, :, 3] det_ymin = detections[0, 0, :, 4] det_xmax = detections[0, 0, :, 5] det_ymax = detections[0, 0, :, 6] bbox = [] for i in range(det_conf.shape[0]): xmin = max(0, int(round(det_xmin[i] * width))) ymin = max(0, int(round(det_ymin[i] * height))) xmax = min(width - 1, int(round(det_xmax[i] * width))) ymax = min(height - 1, int(round(det_ymax[i] * height))) score = det_conf[i] if score < threshold or xmin >= xmax or ymin >= ymax: continue # print('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. # format('person', score, xmin, ymin, xmax, ymax)) bbox.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2) # end = time.time() # print(end-start) # cv2.imshow('result', frame) # cv2.waitKey(0) return bbox #[[0, 0, 239, 239], [0, 0, 239, 239]](返回是網路輸出變換後的人臉畫素座標) #********************************SFD********************************************************************************************************** def do_flip(data): for idx in xrange(data.shape[0]): data[idx,:,:] = np.fliplr(data[idx,:,:]) #***********************************************************MTCNN**************************************************************** #*******************************************************************************************************輸出的圖片大小處理********* image_size='112,112' _vec = image_size.split(',') # 1)將str->list 2)將圖片長寬分開 assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) # 將列表元素強制為int # *******************************************************************************************************輸出的圖片大小處理********* # *****************************************************************************************************************************捆綁mtcnn 預訓練模型 ctx = mx.gpu(0) model='../models/model-r50-am-lfw/model,0' _vec = model.split(',') # 1)將模型地址與模型標籤分開 2)將str->list assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) # 載入模型 引數1)模型地址 2)模型的標籤 all_layers = sym.get_internals() # 檢視模型中間層 sym = all_layers['fc1_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) # 建立模組 # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) # 繫結模型 model.set_params(arg_params, aux_params) # 設定權重引數 #**************************************************************************************************************************************捆綁mtcnn 預訓練模型 mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') # 賦值MTCNN模型地址,作為引數輸入探測器類中 detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) # 傳參的過程,將引數傳入捆綁的模型中 def get_feature(ssd_bbox,detector,model): # 這裡是將原MTCNN的get_feature和get_input結合 # ssd_bbox is bgr image ret = detector.detect_face_limited(ssd_bbox, det_type=2) # 呼叫探測器的人臉檢測函式,返回所有bbox和point det_type=2 means using R+O, else using O' #print '************************************************************************************************' #print('ret',ret[1]) #5個關鍵點 if ret is None: return None bbox, points = ret #print('bboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbbox',bbox) if bbox.shape[0] == 0: return None bbox = bbox[0, 0:4] #定義bbox # print('bboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbboxbbox',bbox) points = points[0, :].reshape((2, 5)).T #定義point #print('pointspointspointspointspointspointspointspointspointspointspointspointspoints',points) #MTCNN的標籤儲存(bbox,points) # np.savetxt("../label/msra_lmk",(bbox,points)) # a=np.loadtxt("msra_lmk") # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa',a) #確定一次 flip=0 nimg = face_preprocess.preprocess(ssd_bbox, bbox, points, image_size='112,112') # 用mtcnn的bbox,point結果,替代cv2的bbox結果 ->這個函式目的是為了將座標變成圖片 # cv2.imshow('mtcnn_face', nimg) cv2.imwrite('./feature/'+str(random.random())+'.jpg',nimg) print '********************************************************************************************************************************' nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) aligned = np.transpose(nimg, (2, 0, 1)) # 轉置 # print(nimg.shape) embedding = None for flipid in [0, 1]: if flipid == 1: if flip == 0: break do_flip(aligned) input_blob = np.expand_dims(aligned, axis=0) data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) model.forward(db, is_train=False) _embedding = model.get_outputs()[0].asnumpy() # print(_embedding.shape) if embedding is None: embedding = _embedding else: embedding += _embedding embedding=preprocessing.normalize(embedding).flatten() #這裡 返回的是歸一化的圖片特徵 return embedding #***********************************************************MTCNN**************************************************************************************************** if __name__ == '__main__': # 讀圖片 image = cv2.imread('/home/chenyu/3.jpg') #cv2.imshow('yuantu', image) #繫結ssd模型 caffe.set_device(0) caffe.set_mode_gpu() model_def = '../models/sfd/deploy.prototxt' model_weights = '../models/sfd/SFD.caffemodel' net = caffe.Net(model_def, model_weights, caffe.TEST) bbox = sfd_detection(image, 0.5, 'global') # 引數:閾值,模式 print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++',bbox) # *******************************************SDF擴充套件*************************************************等同於進行了根據人臉定位人身,圖片定位壓縮 offset_height = 160 offset_width = 160 frame_height = image.shape[0] frame_width = image.shape[1] for tbbox in bbox: y1 = max(0, tbbox[1] - offset_height) # 根據人臉畫素座標,進行相對放大,獲取人身定位 y2 = min(frame_height, tbbox[3] + offset_height) x1 = max(0, tbbox[0] - offset_width) x2 = min(frame_width, tbbox[2] + offset_width) ssd_facetobody = image[y1:y2, x1:x2, :] # 輸出樣本的人臉特徵圖 # print image_facetobody # **********************************************SDF擴充套件************************************************* 等同於進行了根據人臉定位人身,圖片定位壓縮 ssd_bbox = image[tbbox[1]:tbbox[3], tbbox[0]:tbbox[2], :] #cv2.imshow('SSD_body', ssd_facetobody) #cv2.imshow('SSD_bbox', ssd_bbox) #繫結mtcnn模型 ctx = mx.gpu(0) model = '../models/model-r50-am-lfw/model,0' _vec = model.split(',') # 1)將模型地址與模型標籤分開 2)將str->list assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) # 載入模型檢查點(作用:在每次迭代時,在特殊位置進行點檢測) 引數1)模型地址 2)模型的標籤 返回:sym(symbol):網路模型配置 arg:網路權重字典 aux:附加項字典 all_layers = sym.get_internals() # 檢視模型中間層 sym = all_layers['fc1_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) # 建立模組 # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) # 繫結模型 model.set_params(arg_params, aux_params) # 設定權重引數 # **************************************************************************************************************************************捆綁mtcnn 預訓練模型 mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') # 賦值MTCNN模型地址,作為引數輸入探測器類中 detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) # 傳參的過程,將引數傳入捆綁的模型中 feature_label = get_feature(ssd_bbox) # SSD輸出的point放入MTCNN tFace = misc.imresize(ssd_facetobody, (160, 160), interp='bilinear')