1. 程式人生 > >人臉識別CNN網路微調流程

人臉識別CNN網路微調流程

目標
微調論文”A Lightened CNN for Deep Face Representation”中的29層caffe網路。

資料
自己收集的包含500人的亞洲人臉資料集;
這裡寫圖片描述

訓練資料準備
1.獲得500人微調資料集之後,使用python原始碼工具將資料集劃分為訓練集和驗證集:

#!---* coding: utf-8 --*--
#!/usr/bin/python

"""
將資料集隨機分成訓練集、驗證集、測試集

"""

from PIL import Image, ImageFile
import random
import os, time
import logging



logger = logging.getLogger(__name__)
ImageFile.LOAD_TRUNCATED_IMAGES = True
class CreateDataSet: """ 建立train、val、test資料集 """ def __init__(self): pass @staticmethod def randNumber(li,num): temp_li=li res=random.sample(li,num) for i in res: temp_li.remove(i) #print ("res len=%d"%len(res))
#print ("temp_li len=%d"%len(temp_li)) return res,temp_li @staticmethod def openImage(image): return Image.open(image, mode="r") @staticmethod def saveImage(image, path): image.save(path) def makeDir(path): try: if
not os.path.exists(path): if not os.path.isfile(path): # os.mkdir(path) os.makedirs(path) return 0 else: return 1 except Exception as e: print (str(e)) return -2 def save(image, des_path, class_name,file_name): temp_name=os.path.join(des_path,class_name) file_name=os.path.join(temp_name,file_name) CreateDataSet.saveImage(image, file_name) is_create_test=0 ratio=0.1 def create(path, new_path): """ 多執行緒處理事務 :param src_path: 資原始檔 :param des_path: 目的地檔案 :return: """ train_sample_path=os.path.join(new_path, "train") val_sample_path=os.path.join(new_path, "val") test_sample_path=os.path.join(new_path, "test") if (os.path.isdir(new_path)): if makeDir(train_sample_path) == -1: print ('create train dir failure') return -1 if makeDir(val_sample_path) == -1: print ('create val dir failure') return -1 if(is_create_test==1): if makeDir(test_sample_path) == -1: print ('create test dir failure') return -1 else: print ('the input param new_path is not the dir') return -1 if os.path.isdir(path): class_names = os.listdir(path) else: print ('the input param path is not the dir') return -1 for name in class_names: print ("process class name=%s"%name) tmp_class_name = os.path.join(path, name) val_sample_num=0; test_sample_num=0; val_total_sample_num=0; test_total_sample_num=0; if (os.path.isdir(tmp_class_name)): image_names=os.listdir(tmp_class_name) total=len(image_names) li = [i for i in range(total)] val_total_sample_num=int(total*ratio) val_name_list,remain_list=CreateDataSet.randNumber(li,val_total_sample_num) if(is_create_test==1): test_total_sample_num=int(total*ratio) test_name_list,remain_list=CreateDataSet.randNumber(remain_list,test_total_sample_num) #read val sample if makeDir(os.path.join(val_sample_path,name)) == -1: print ('create val class dir failure') return -1 print ("val sample number=%d"%val_total_sample_num) #print val_name_list while(val_sample_num<val_total_sample_num): index=val_name_list[val_sample_num] temp_img_name=os.path.join(tmp_class_name,image_names[index]) if(os.path.isfile(temp_img_name)): image = CreateDataSet.openImage(temp_img_name) save(image,val_sample_path,name,image_names[index]) val_sample_num=val_sample_num+1 #read test sample if(is_create_test==1): if makeDir(os.path.join(test_sample_path,name)) == -1: print ('create test class dir failure') return -1 print ("test sample number=%d"%test_total_sample_num) #print test_name_list while(test_sample_num<test_total_sample_num): index=test_name_list[test_sample_num] temp_img_name=os.path.join(tmp_class_name,image_names[index]) if(os.path.isfile(temp_img_name)): image = CreateDataSet.openImage(temp_img_name) save(image,test_sample_path,name,image_names[index]) test_sample_num=test_sample_num+1 #read train sample if makeDir(os.path.join(train_sample_path,name)) == -1: print ('create train class dir failure') return -1 print ("train sample number=%d"%len(remain_list)) #print remain_list for train in remain_list: temp_img_name=os.path.join(tmp_class_name,image_names[train]) if(os.path.isfile(temp_img_name)): image = CreateDataSet.openImage(temp_img_name) save(image,train_sample_path,name,image_names[train]) print ("finish") if __name__ == '__main__': create("H:\\Finetuning-500","H:\\Finetuning-500_new")

所得的訓練集和驗證集放置Finetuning-500_new中。
這裡寫圖片描述

2.利用gen_train_val_txt.py生成訓練集和驗證集的圖片列表及其對應標籤的txt檔案(label標籤需從0開始);將Finetuning-500_new放置/home/Data資料夾下,並在caffe-master/examples/下新建資料夾Finetuning-500,將gen_train_val_txt.py放置該資料夾下;

#!---* coding: utf-8 --*--
#!/usr/bin/python
import os
class CaffeData:
    def __init__(self):
        pass
    @staticmethod
    def create_train_txt(train_data_path,train_txt_path):
        txt_path = train_txt_path + '/' + r'train.txt'
        f = open(txt_path, "w")
        count = 0
        for dir_name in os.listdir(val_data_path):
            #   count=0
            for file_name in os.listdir(os.path.join(val_data_path, dir_name)):
            #   count=count+1
            #   img_file_path='/'+dir_name+'/'+file_name+" "+dir_name
            #   img_file_path='/'+dir_name+'/'+file_name
                img_file_path = '/' + dir_name + '/' + file_name + " " + str(count)
                f.write(img_file_path + '\n')
          # print '{} class: {}'.format(dir_name,count)
            count = count + 1

    @staticmethod
    def create_val_txt(val_data_path,val_txt_path):
        txt_path=val_txt_path +'/'+r'val.txt'
        f=open(txt_path,"w")
        count=0
        for dir_name in os.listdir(val_data_path):
        #   count=0
            for file_name in os.listdir(os.path.join(val_data_path,dir_name)):
#               count=count+1
#               img_file_path='/'+dir_name+'/'+file_name+" "+dir_name
          #     img_file_path='/'+dir_name+'/'+file_name
                img_file_path = '/'+dir_name+'/'+file_name+" "+ str(count)
                f.write(img_file_path+'\n')
#       print '{} class: {}'.format(dir_name,count)
        count=count+1

if __name__ == '__main__':
    CaffeData.create_train_txt(r'/home/Data/Finetuning-500_new/train',r'/home/yi_miao/caffe-master/examples/Finetuning-500')
    CaffeData.create_val_txt(r'/home/Data/Finetuning-500_new/val',r'/home/yi_miao/caffe-master/examples/Finetuning-500')

#finetuning 資料集

生成的訓練集和驗證集的圖片列表及其對應標籤的txt檔案內容如下:
這裡寫圖片描述

利用CreateLmdb.sh指令碼檔案生成訓練集和驗證集的lmdb資料格式,CreateLmdb.sh位於caffe-master/examples/Finetuning-500下;

#!/bin/sh

CAFFE_ROOT='/***/caffe-master/'
TOOLS=$CAFFE_ROOT/build/tools

EXAMPLE=$CAFFE_ROOT/examples/Finetuning-500
TRAIN_TXT=$EXAMPLE/train.txt
VAL_TXT=$EXAMPLE/val.txt


DATA=$CAFFE_ROOT/data/Finetuning-500
TRAIN_DATA_ROOT=/home/Data/Finetuning-500_new/train
VAL_DATA_ROOT=/home/Data/Finetuning-500_new/val
TRAIN_DATA_LMDB=$DATA/train_lmdb
VAL_DATA_LMDB=$DATA/val_lmdb


IMG_WIDTH=144
IMG_HEIGHT=144

# Set RESIZE=true to resize the images. Leave as false if images have already been resized using another tool.
RESIZE=true

if $RESIZE; then
    RESIZE_HEIGHT=$IMG_HEIGHT
    RESIZE_WIDTH=$IMG_WIDTH
else
    RESIZE_HEIGHT=0
    RESIZE_WIDTH=0
fi


if [ -d "$TRAIN_DATA_LMDB" ]; then
    rm -rf $TRAIN_DATA_LMDB
fi

if [ -d "$VAL_DATA_LMDB" ]; then
    rm -rf $VAL_DATA_LMDB
fi

if [ ! -d "$TRAIN_DATA_ROOT" ]; then
    echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
    exit 1
fi

if [ ! -d "$VAL_DATA_ROOT" ]; then
   echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
   exit 1
fi


echo $RESIZE_HEIGHT
echo $RESIZE_WIDTH

echo "Creating train lmdb..."

GLOG_logtostderr=1 $TOOLS/convert_imageset \
     --resize_height=$RESIZE_HEIGHT \
     --resize_width=$RESIZE_WIDTH \
     --shuffle \
     --gray=true \
     $TRAIN_DATA_ROOT \
     $TRAIN_TXT \
     $TRAIN_DATA_LMDB\

echo "Creating val lmdb..."

GLOG_logtostderr=1 $TOOLS/convert_imageset \
     --resize_height=$RESIZE_HEIGHT \
     --resize_width=$RESIZE_WIDTH \
     --shuffle \
     --gray=true \
     $VAL_DATA_ROOT \
     $VAL_TXT \
     $VAL_DATA_LMDB 

echo "Done"

利用CreateMeanFile.sh指令碼檔案生成訓練集和驗證集的lmdb資料格式,CreateMeanFile.sh位於caffe-master/examples/Finetuning-500下;

#!/bin/sh

CAFFE_ROOT='/***/caffe-master'

TOOLS=$CAFFE_ROOT/build/tools
DATA=$CAFFE_ROOT/data/Finetuning-500
EXAMPLE=$CAFFE_ROOT/examples/Finetuning-500
TRAIN_DATA_LMDB=$DATA/train_lmdb
VAL_DATA_LMDB=$DATA/val_lmdb
TRAIN_MEAN=$DATA/train_data_mean.binaryproto
VAL_MEAN=$DATA/val_data_mean.binaryproto

$TOOLS/compute_image_mean $TRAIN_DATA_LMDB \
  $TRAIN_MEAN 


$TOOLS/compute_image_mean $VAL_DATA_LMDB \
  $VAL_MEAN

使用caffe,微調模型
方法一:
1.修改train_test.prototxt和solver.prototxt檔案
以Light CNN29網路模型為例,考慮到資料量較少,LCNN29微調過程中,保留前面層的引數,修改最後一層fc2為fc2_500,將最後預測種類設定為500,新增propagate_down:0引數,微調過程中只訓練最後一個全連線fc2_500層,保持前面層的引數不變;

layer{
  name: "fc2_qh_500"  //modify
  type: "InnerProduct"
  bottom: "eltwise_fc1"
  top: "fc2_qh_500"   //modify
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  propagate_down:0    //modify
  inner_product_param{
     num_output: 500  //modify
    weight_filler {
       type: "xavier"
  #      type:"gaussian"
  #      std:0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}

問題:
測試通過設定propagate_down引數訓練的網路模型,由設定fc2層propagate_down為0微調得到的網路模型,雖然成功識別的數量與原始模型差不多,但失敗的數量大大增加,根據資料所說:設定fc2層propagate_down為0,只會微調fc2層的引數,前面層的引數理論上保持不變,因此在利用fc1層特徵進行人臉識別時效果應該和原始模型一致,但測試結果差異較大,對該引數的具體作用存在疑問,希望有人能答疑解惑。

方法二
希望C層的引數不會改變,但是C前面的AB層的引數會改變,這種情況,只是固定了C層的引數,C層得到的梯度依然會反向傳播給前面的B層。只需要將對應的引數blob的學習率調整為0:在layer裡面加上param { lr_mult: 0 decay_mult:0}就可以了,比如全連線層裡面,這裡設定fc2層的lr_mult和decay_mult引數為0:

layer {    
type: "InnerProduct"    
param { 
# 對應第1個引數blob的配置,也就是全連線層的引數矩陣的配置         
  lr_mult: 0 # 學習率為0,其他引數可以看caffe.proto裡面的ParamSpec這個型別 
  decay_mult: 0  
 } 
param {    
 # 對應第2個引數blob的配置,也就是全連線層的偏置項的配置   
 lr_mult: 0 # 學習率為0  
 decay_mult: 0 
}

對學習率的設定
初始學習率設定為0.001,並以inv方式進行衰減。發現loss逐漸衰減了,但迭代2萬多次後,損失函式值又開始上升,需要調小學習率,將初始學習率設為0.0001

net: "/***/caffe-master/examples/Finetuning-500/train_test.prototxt"
test_iter: 580       # 580*16(batchsize)=9280>4028
test_interval:1340   # 1340*64(batchsize)=85760>85743
test_compute_loss: true

base_lr: 0.0001
#lr_policy:"step"
#gamma:0.1
#stepsize:20000
momentum: 0.9
weight_decay: 0.0005
lr_policy: "inv"
gamma: 0.00005
power: 0.75

display: 50
max_iter: 500000
snapshot: 5000
snapshot_prefix: "/***/caffe-master/examples/Finetuning-500/snapshot/"
solver_mode: GPU
#solver_mode: CPU
debug_info: false

後續逐步增加微調層數進行微調,但由於資料量較少,微調較多的層數效果反而不如微調最後兩個全連線層。