1. 程式人生 > >FaceNet source code explanation (part I)

FaceNet source code explanation (part I)

FaceNet source code explanation (part I)

  1. Download source code from https://github.com/davidsandberg/facenet
  2. Download pretrained models below

Pre-trained models

Model name LFW accuracy Training dataset Architecture
20180408-102900 0.9905 CASIA-WebFace Inception ResNet v1
20180402-114759 0.9965 VGGFace2 Inception ResNet v1
  1. compare.py
    – Performs face alignment and calculates L2 distance between the embeddings of images.
    – load_and_align_data : Uses MTCNN to get the bbox around face and choose the largest bbox and resize to designated image size.

  2. ~/src/align/detect_face.py
    – Tensorflow implementation of the face detection / alignment algorithm found at
    https://github.com/kpzhang93/MTCNN_face_detection_alignment
    – PNet: conv -> prelu -> maxpool -> conv -> prelu -> conv -> prelu -> conv -> softmax. PNet aims to obtain candidate facial windows and their bounding box regression vectors
    – RNet: conv -> prelu -> maxpool -> conv -> prelu -> maxpool -> conv -> prelu -> fc -> prelu -> fc -> softmax
    feed candidate facial windows to a convnet which rejects a large number of false detection
    – ONet: conv -> prelu -> maxpool -> conv -> prelu -> maxpool -> conv -> prelu -> maxpool -> conv -> prelu -> fc -> prelu -> fc -> softmax. Output face region as well as 5 facial keypoints
    – Training: formulate two-class classification task with cross-entropy loss function:
    在這裡插入圖片描述

    – Bounding Box Regression & NMS:
    More details can be found on paper : Joint Face Detection and Alignment using
    Multi-task Cascaded Convolutional Networks

A detailed and complete review of code

compare.py

"""Performs face alignment and calculates L2 distance between the embeddings of images."""

# MIT License
# 
# Copyright (c) 2016 David Sandberg
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from scipy import misc
import tensorflow as tf
import numpy as np
import sys
import os
import copy
import argparse
import facenet
import align.detect_face

def main(args):

    # 載入並通過detect_face(之後會解釋)檢測人臉,將輸出的影象resize成指定的大小(160x160)
    images = load_and_align_data(args.image_files, args.image_size, args.margin, args.gpu_memory_fraction)
    # 建立一個tensorflow圖
    with tf.Graph().as_default():
        # 建立一個tensorflow session
        with tf.Session() as sess:
      
            # Load the model
            # 載入模型到圖中
            facenet.load_model(args.model)
    
            # Get input and output tensors
            # 從載入的模型圖中取得各個tensor
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")

            # Run forward pass to calculate embeddings
            feed_dict = { images_placeholder: images, phase_train_placeholder:False }
            emb = sess.run(embeddings, feed_dict=feed_dict)

            # 得到輸入影象的個數
            nrof_images = len(args.image_files)

            print('Images:')
            for i in range(nrof_images):
                print('%1d: %s' % (i, args.image_files[i]))
            print('')
            
            # Print distance matrix
            print('Distance matrix')
            print('    ', end='')
            for i in range(nrof_images):
                print('    %1d     ' % i, end='')
            print('')
            for i in range(nrof_images):
                print('%1d  ' % i, end='')
                for j in range(nrof_images):
                    dist = np.sqrt(np.sum(np.square(np.subtract(emb[i,:], emb[j,:]))))
                    print('  %1.4f  ' % dist, end='')
                print('')
            
            
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction):

    minsize = 20 # minimum size of face
    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
    factor = 0.709 # scale factor
    
    print('Creating networks and loading parameters')
    # 建立圖,載入MTCNN演算法流程圖,並規定GPU佔用空間
    ### 如果需要指定某個GPU:
    # with tf.Graph().as_default():
    #     with tf.device('/device:GPU:1'):
    #         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
    #         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
    #                                                 allow_soft_placement=True,
    #                                                 log_device_placement=False))
    #         with sess.as_default():
    #             pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
  
    tmp_image_paths=copy.copy(image_paths)
    img_list = []
    for image in tmp_image_paths:
        img = misc.imread(os.path.expanduser(image), mode='RGB')
        img_size = np.asarray(img.shape)[0:2]
        # 得到人臉的bbox
        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
        if len(bounding_boxes) < 1:
          image_paths.remove(image)
          print("can't detect face, remove ", image)
          continue
        det = np.squeeze(bounding_boxes[0,0:4])
        bb = np.zeros(4, dtype=np.int32)
        bb[0] = np.maximum(det[0]-margin/2, 0)
        bb[1] = np.maximum(det[1]-margin/2, 0)
        bb[2] = np.minimum(det[2]+margin/2, img_size[1])
        bb[3] = np.minimum(det[3]+margin/2, img_size[0])
        cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
        aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
        prewhitened = facenet.prewhiten(aligned)
        img_list.append(prewhitened)
    images = np.stack(img_list)
    return images

def parse_arguments(argv):
    parser = argparse.ArgumentParser()
    
    parser.add_argument('model', type=str, 
        help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file')
    parser.add_argument('image_files', type=str, nargs='+', help='Images to compare')
    parser.add_argument('--image_size', type=int,
        help='Image size (height, width) in pixels.', default=160)
    parser.add_argument('--margin', type=int,
        help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
    parser.add_argument('--gpu_memory_fraction', type=float,
        help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
    return parser.parse_args(argv)

if __name__ == '__main__':
    main(parse_arguments(sys.argv[1:]))

To be continued …