YOLO配置檔案理解

阿新 • • 發佈：2018-12-25

http://www.infocool.net/kb/WWW/201703/317548.html


  
   
    
     
    
    
     
      [
      net]
     
    
   
    
     
    
    
     
      batch=
      64                           每batch個樣本更新一次引數。
     
    
   
    
     
    
    
     
      subdivisions=
      8                     如果記憶體不夠大，將batch分割為subdivisions個子batch，每個子batch的大小為batch/subdivisions。
     
    
 

   
    
     
    
    
     
                                         在darknet程式碼中，會將batch/subdivisions命名為batch。
     
    
   
    
     
    
    
     
      height=
      416                         input影象的高
     
    
   
    
     
    
    
     
      width=
      416                          Input影象的寬
     
    
 

   
    
     
    
    
     
      channels=
      3                         Input影象的通道數
     
    
   
    
     
    
    
     
      momentum=
      0.9                       動量
     
    
   
    
     
    
    
     
      decay=
      0.0005                       權重衰減正則項，防止過擬合
     
    
   
    
     
    
    
     
      angle=
      0 
                            通過旋轉角度來生成更多訓練樣本
     
    
   
    
     
    
    
     
      saturation = 
      1.5                   通過調整飽和度來生成更多訓練樣本
     
    
   
    
     
    
    
     
      exposure = 
      1.5                     通過調整曝光量來生成更多訓練樣本
     
    
   
    
     
    
    
     
      hue=
      .1                             通過調整色調來生成更多訓練樣本
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      learning_rate=
      0.0001               初始學習率
     
    
   
    
     
    
    
     
      max_batches = 
      45000                訓練達到max_batches後停止學習
     
    
   
    
     
    
    
     
      policy=steps                       調整學習率的policy，有如下policy：CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
     
    
   
    
     
    
    
     
      steps=
      100,
      25000,
      35000              根據batch_num調整學習率
     
    
   
    
     
    
    
     
      scales=
      10,
      .1,
      .1                    學習率變化的比例，累計相乘
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      batch_normalize=
      1                  是否做BN
     
    
   
    
     
    
    
     
      filters=
      32                         輸出多少個特徵圖
     
    
   
    
     
    
    
     
      size=
      3                             卷積核的尺寸
     
    
   
    
     
    
    
     
      stride=
      1                           做卷積運算的步長
     
    
   
    
     
    
    
     
      pad=
      1                              如果pad為
      0,padding由 padding引數指定。如果pad為
      1，padding大小為size/
      2
     
    
   
    
     
    
    
     
      activation=leaky                   啟用函式：
     
    
   
    
     
    
    
     
                                         logistic，loggy，relu，elu，relie，plse，hardtan，lhtan，linear，ramp，leaky，tanh，stair
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      maxpool]
     
    
   
    
     
    
    
     
      size=
      2                             池化層尺寸
     
    
   
    
     
    
    
     
      stride=
      2                           池化步進
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      batch_normalize=
      1
     
    
   
    
     
    
    
     
      filters=
      64
     
    
   
    
     
    
    
     
      size=
      3
     
    
   
    
     
    
    
     
      stride=
      1
     
    
   
    
     
    
    
     
      pad=
      1
     
    
   
    
     
    
    
     
      activation=leaky
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      maxpool]
     
    
   
    
     
    
    
     
      size=
      2
     
    
   
    
     
    
    
     
      stride=
      2
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      #######
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      batch_normalize=
      1
     
    
   
    
     
    
    
     
      size=
      3
     
    
   
    
     
    
    
     
      stride=
      1
     
    
   
    
     
    
    
     
      pad=
      1
     
    
   
    
     
    
    
     
      filters=
      1024
     
    
   
    
     
    
    
     
      activation=leaky
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      batch_normalize=
      1
     
    
   
    
     
    
    
     
      size=
      3
     
    
   
    
     
    
    
     
      stride=
      1
     
    
   
    
     
    
    
     
      pad=
      1
     
    
   
    
     
    
    
     
      filters=
      1024
     
    
   
    
     
    
    
     
      activation=leaky
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      route]                            the route layer 
      is to bring finer grained features 
      in 
      from earlier 
      in the network
     
    
   
    
     
    
    
     
      layers=
      -9
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      reorg]                            the reorg layer 
      is to make these features match the feature map size at the later layer. 
     
    
   
    
     
    
    
     
                                         The end feature map 
      is 
      13x13, the feature map 
      from earlier 
      is 
      26x26x512. 
     
    
   
    
     
    
    
     
                                         The reorg layer maps the 
      26x26x512 feature map onto a 
      13x13x2048 feature map 
     
    
   
    
     
    
    
     
                                         so that it can be concatenated with the feature maps at 
      13x13 resolution.
     
    
   
    
     
    
    
     
      stride=
      2
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      route]
     
    
   
    
     
    
    
     
      layers=
      -1,
      -3
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      batch_normalize=
      1
     
    
   
    
     
    
    
     
      size=
      3
     
    
   
    
     
    
    
     
      stride=
      1
     
    
   
    
     
    
    
     
      pad=
      1
     
    
   
    
     
    
    
     
      filters=
      1024
     
    
   
    
     
    
    
     
      activation=leaky
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      convolutional]
     
    
   
    
     
    
    
     
      size=
      1
     
    
   
    
     
    
    
     
      stride=
      1
     
    
   
    
     
    
    
     
      pad=
      1
     
    
   
    
     
    
    
     
      filters=
      125                        region前最後一個卷積層的filters數是特定的，計算公式為filter=num*(classes+
      5) 
     
    
   
    
     
    
    
                                        
      5的意義是
      5個座標，論文中的tx,ty,tw,th,to
     
    
   
    
     
    
    
     
      activation=linear
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      [
      region]
     
    
   
    
     
    
    
     
      anchors = 
      1.08,
      1.19,  
      3.42,
      4.41,  
      6.63,
      11.38,  
      9.42,
      5.11,  
      16.62,
      10.52          預選框，可以手工挑選，
     
    
   
    
     
    
    
     
                                                                                      也可以通過k means 從訓練樣本中學出
     
    
   
    
     
    
    
     
      bias_match=
      1
     
    
   
    
     
    
    
     
      classes=
      20                         網路需要識別的物體種類數
     
    
   
    
     
    
    
     
      coords=
      4                           每個box的
      4個座標tx,ty,tw,th
     
    
   
    
     
    
    
     
      num=
      5                              每個grid cell預測幾個box
     
    
   
    
     
    
    
     
      softmax=
      1                          使用softmax做啟用函式
     
    
   
    
     
    
    
     
      jitter=
      .2                          通過抖動增加噪聲來抑制過擬合
     
    
   
    
     
    
    
     
      rescore=
      1                          暫理解為一個開關，非
      0時通過重打分來調整l.delta（預測值與真實值的差）
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      object_scale=
      5                     暫理解為計算損失時預測框中有物體時的權重
     
    
   
    
     
    
    
     
      noobject_scale=
      1                   暫理解為計算損失時預測框中無物體時的權重
     
    
   
    
     
    
    
     
      class_scale=
      1                      暫理解為計算類別損失時的權重                      
     
    
   
    
     
    
    
     
      coord_scale=
      1                      暫理解為計算損失時座標偏差的權重
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      absolute=
      1
     
    
   
    
     
    
    
     
      thresh = 
      .6
     
    
   
    
     
    
    
     
      random=
      0                           是否隨機確定最後一個預測框

darknet對應程式碼

找到cfg檔案解析的程式碼，選擇detector demo 作為入口

darknet.c檔案 main 函式開始


  
   
    
     
    
    
     
          } 
      else 
      if (
      0 == 
      strcmp(argv[
      1], 
      "detector")){
     
    
   
    
     
    
    
     
          run_detector(argc, argv);

Detector.c檔案 run_detector函式


  
   
    
     
    
    
     
      char *prefix = find_char_arg(argc, argv, 
      "-prefix", 
      0);
     
    
   
    
     
    
    
     
      float thresh = find_float_arg(argc, argv, 
      "-thresh", 
      .24);
     
    
   
    
     
    
    
     
      float hier_thresh = find_float_arg(argc, argv, 
      "-hier", 
      .5);
     
    
   
    
     
    
    
     
      int cam_index = find_int_arg(argc, argv, 
      "-c", 
      0);
     
    
   
    
     
    
    
     
      int frame_skip = find_int_arg(argc, argv, 
      "-s", 
      0);
     
    
   
    
     
    
    
     
      if(argc < 
      4){
     
    
   
    
     
    
    
         
      fprintf(
      stderr, 
      "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[
      0], argv[
      1]);
     
    
   
    
     
    
    
         
      return;
     
    
   
    
     
    
    
     
      }
     
    
   
    
     
    
    
     
      char *gpu_list = find_char_arg(argc, argv, 
      "-gpus", 
      0);
     
    
   
    
     
    
    
     
      char *outfile = find_char_arg(argc, argv, 
      "-out", 
      0);
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      else 
      if(
      0==
      strcmp(argv[
      2], 
      "demo")) {
     
    
   
    
     
    
    
         
      list *options = read_data_cfg(datacfg);
     
    
   
    
     
    
    
         
      int classes = option_find_int(options, 
      "classes", 
      20);
     
    
   
    
     
    
    
         
      char *name_list = option_find_str(options, 
      "names", 
      "data/names.list");
     
    
   
    
     
    
    
         
      char **names = get_labels(name_list);
     
    
   
    
     
    
    
     
          demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
     
    
   
    
     
    
    
     
      }

read_data_cfg函式解析配置檔案，儲存到options指標。

class

int classes = option_find_int(options, "classes", 20);

classes為YOLO可識別的種類數

batch、learning_rate、momentum、decay和 subdivisions

demo.c檔案demo函式

net = parse_network_cfg(cfgfile);

Parser.c檔案 parse_network_cfg函式


  
   
    
     
    
    
     
      list *sections = read_cfg(filename);
     
    
   
    
     
    
    
     
      node *n = sections->front;
     
    
   
    
     
    
    
     
      if(!n) error(
      "Config file has no sections");
     
    
   
    
     
    
    
     
      network net = make_network(sections->size - 
      1);
     
    
   
    
     
    
    
     
      net.gpu_index = gpu_index;
     
    
   
    
     
    
    
     
      size_params params;
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      section *s = (section *)n->val;
     
    
   
    
     
    
    
     
      list *options = s->options;
     
    
   
    
     
    
    
     
      if(!is_network(s)) error(
      "First section must be [net] or [network]");
     
    
   
    
     
    
    
     
      parse_net_options(options, &net);

parse_net_options函式


  
   
    
     
    
    
     
      net->batch = option_find_int(options, 
      "batch",
      1);
     
    
   
    
     
    
    
     
      net->learning_rate = option_find_float(options, 
      "learning_rate", 
      .001);
     
    
   
    
     
    
    
     
      net->momentum = option_find_float(options, 
      "momentum", 
      .9);
     
    
   
    
     
    
    
     
      net->decay = option_find_float(options, 
      "decay", 
      .0001);
     
    
   
    
     
    
    
     
      int subdivs = option_find_int(options, 
      "subdivisions",
      1);
     
    
   
    
     
    
    
     
      net->time_steps = option_find_int_quiet(options, 
      "time_steps",
      1);
     
    
   
    
     
    
    
     
      net->batch /= subdivs;
     
    
   
    
     
    
    
     
      net->batch *= net->time_steps;
     
    
   
    
     
    
    
     
      net->subdivisions = subdivs;

learning_rate為初始學習率，訓練時的真正學習率和學習率的策略及初始學習率有關。

momentum為動量，在訓練時加入動量可以幫助走出local minima 以及saddle point。

decay是權重衰減正則項，用來防止過擬合。

batch的值等於cfg檔案中的batch/subdivisions 再乘以time_steps。
time_steps在yolo預設的cfg中是沒有配置的，所以是預設值1。
因此batch可以認為就是cfg檔案中的batch/subdivisions。

前面有提到batch的意義是每batch個樣本更新一次引數。

而subdivisions的意義在於降低對GPU memory的要求。
darknet將batch分割為subdivisions個子batch，每個子batch的大小為batch/subdivisions，並將子batch命名為batch。

我們看下訓練時和batch有關的程式碼

Detector.c檔案的train_detector函式


  
   
    
     
    
    
     
      #ifdef GPU
     
    
   
    
     
    
    
         
      if(ngpus == 
      1){
     
    
   
    
     
    
    
     
              loss = train_network(net, train);
     
    
   
    
     
    
    
     
          } 
      else {
     
    
   
    
     
    
    
     
              loss = train_networks(nets, ngpus, train, 
      4);
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
      #else
     
    
   
    
     
    
    
     
          loss = train_network(net, train);
     
    
   
    
     
    
    
     
      #endif

Network.c檔案的train_network函式


  
   
    
     
    
    
     
      int batch = net.batch;
     
    
   
    
     
    
    
     
      int n = d.X.rows / batch;
     
    
   
    
     
    
    
     
      float *X = 
      calloc(batch*d.X.cols, 
      sizeof(
      float));
     
    
   
    
     
    
    
     
      float *y = 
      calloc(batch*d.y.cols, 
      sizeof(
      float));
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      int i;
     
    
   
    
     
    
    
     
      float sum = 
      0;
     
    
   
    
     
    
    
     
      for(i = 
      0; i < n; ++i){
     
    
   
    
     
    
    
     
          get_next_batch(d, batch, i*batch, X, y);
     
    
   
    
     
    
    
         
      float err = train_network_datum(net, X, y);
     
    
   
    
     
    
    
     
          sum += err;
     
    
   
    
     
    
    
     
      }

train_network_datum函式


  
   
    
     
    
    
     
      *net.seen += net.batch;
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
     
      ......
     
    
   
    
     
    
    
     
      forward_network(net, 
      state);
     
    
   
    
     
    
    
     
      backward_network(net, 
      state);
     
    
   
    
     
    
    
     
      float error = get_network_cost(net);
     
    
   
    
     
    
    
     
      if(((*net.seen)/net.batch)%net.subdivisions == 
      0) update_network(net);

我們看到，只有((*net.seen)/net.batch)%net.subdivisions == 0時才會更新網路引數。
*net.seen是已經訓練過的子batch數，((*net.seen)/net.batch)%net.subdivisions的意義正是已經訓練過了多少個真正的batch。

policy、steps和scales

Parser.c檔案 parse_network_cfg函式


  
   
    
     
    
    
     
      char *policy_s = option_find_str(options, 
      "policy", 
      "constant");
     
    
   
    
     
    
    
     
      net->policy = get_policy(policy_s);
     
    
   
    
     
    
    
     
      net->burn_in = option_find_int_quiet(options, 
      "burn_in", 
      0);
     
    
   
    
     
    
    
     
      if(net->policy == STEP){
     
    
   
    
     
    
    
     
          net->step = option_find_int(options, 
      "step", 
      1);
     
    
   
    
     
    
    
     
          net->scale = option_find_float(options, 
      "scale", 
      1);
     
    
   
    
     
    
    
     
      } 
      else 
      if (net->policy == STEPS){
     
    
   
    
     
    
    
         
      char *l = option_find(options, 
      "steps");   
     
    
   
    
     
    
    
         
      char *p = option_find(options, 
      "scales");   
     
    
   
    
     
    
    
         
      if(!l || !p) error(
      "STEPS policy must have steps and scales in cfg file");
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
         
      int len = 
      strlen(l);
     
    
   
    
     
    
    
         
      int n = 
      1;
     
    
   
    
     
    
    
         
      int i;
     
    
   
    
     
    
    
         
      for(i = 
      0; i < len; ++i){
     
    
   
    
     
    
    
             
      if (l[i] == 
      ',') ++n;
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
         
      int *steps = 
      calloc(n, 
      sizeof(
      int));
     
    
   
    
     
    
    
         
      float *scales = 
      calloc(n, 
      sizeof(
      float));
     
    
   
    
     
    
    
         
      for(i = 
      0; i < n; ++i){
     
    
   
    
     
    
    
             
      int step    = atoi(l);
     
    
   
    
     
    
    
             
      float scale = atof(p);
     
    
   
    
     
    
    
     
              l = 
      strchr(l, 
      ',')+
      1;
     
    
   
    
     
    
    
     
              p = 
      strchr(p, 
      ',')+
      1;
     
    
   
    
     
    
    
     
              steps[i] = step;
     
    
   
    
     
    
    
     
              scales[i] = scale;
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
          net->scales = scales;
     
    
   
    
     
    
    
     
          net->steps = steps;
     
    
   
    
     
    
    
     
          net->num_steps = n;
     
    
   
    
     
    
    
     
      } 
      else 
      if (net->policy == EXP){
     
    
   
    
     
    
    
     
          net->gamma = option_find_float(options, 
      "gamma", 
      1);
     
    
   
    
     
    
    
     
      } 
      else 
      if (net->policy == SIG){
     
    
   
    
     
    
    
     
          net->gamma = option_find_floa

YOLO配置檔案理解

darknet對應程式碼

class

batch、learning_rate、momentum、decay和 subdivisions

policy、steps和scales

YOLO配置檔案理解

YOLO配置檔案理解以及解析過程

【YOLO學習筆記】之YOLO配置檔案詳解

udev配置檔案理解

YOLO配置文件理解

springboot中配置檔案application.properties的理解

flume配置檔案和實際場景理解

《深入理解Spring Cloud與微服務構建》學習筆記(二)~使用Spring Boot配置檔案

(很容易理解)Java 建立/讀取配置檔案

ChainDesk|Fabric核心配置檔案的理解

配置檔案的理解（pom.xml、web.xml）

對抽象工廠+反射+配置檔案的例項理解

YOLOV2配置檔案的理解

使用groovy理解gradle配置檔案

Java專案讀取配置檔案時，FileNotFoundException 系統找不到指定的檔案，System.getProperty("user.dir")的理解

Mybatis配置檔案: enviroments執行環境自行理解

XML配置檔案頭部理解

YOLO訓練自己資料集配置檔案修改.md

YOLO修改配置檔案中類的數量

caffe（2）配置檔案.prototxt的理解

YOLO配置檔案理解

darknet對應程式碼

class

batch、learning_rate、momentum、decay和 subdivisions

policy、steps和scales

相關推薦