YOLOv2原始碼分析(五)
文章全部YOLOv2原始碼分析
0x01 make_convolutional_layer
終於又回到了make_convolutional_layer
這個函式
//make_convolutional_layer
if(binary){
l.binary_weights = calloc(l.nweights, sizeof(float));
l.cweights = calloc(l.nweights, sizeof(char));
l.scales = calloc (n, sizeof(float));
}
if(xnor){
l.binary_weights = calloc(l.nweights, sizeof(float));
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
}
if(batch_normalize){
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for (i = 0; i < n; ++i){
l.scales[i] = 1;
}
l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float ));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
}
...
fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
return l;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
如果你把之前的幾篇文章都看過的話,那麼這裡的引數意義你應該很清楚了。這裡面我唯一要說的幾個內容是關於cuda程式設計的,但是我會把這部分內容放到本系列文章的最後去說,如果你感興趣的話,可以到時候去看看。
至此我們終於結束了make_convolutional_layer
函式
0x02 parse_convolutional
大家可以回到(二)中的0x0103
//parse_convolutional
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam);
layer.flipped = option_find_int_quiet(options, "flipped", 0);
layer.dot = option_find_float_quiet(options, "dot", 0);
return layer;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
後面沒什麼好說的,回到parse_network_cfg
0x03 parse_network_cfg
時隔多日,又回到了這裡(二)0x0102
//parse_network_cfg
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == DECONVOLUTIONAL){
l = parse_deconvolutional(options, params);
}
- 1
- 2
- 3
- 4
- 5
- 6
我們看這個parse_deconvolutional
函式
0x00301 parse_deconvolutional
layer parse_deconvolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
int stride = option_find_int(options, "stride",1);
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
int batch,h,w,c;
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;
if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
int pad = option_find_int_quiet(options, "pad",0);
int padding = option_find_int_quiet(options, "padding",0);
if(pad) padding = size/2;
layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam);
return l;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
上面的一些引數我在之前的文章中已經說過了,這裡就不再說明了。直接看關鍵函式make_deconvolutional_layer
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
{
int i;
layer l = {0};
l.type = DECONVOLUTIONAL;
...
l.forward = forward_deconvolutional_layer;
l.backward = backward_deconvolutional_layer;
l.update = update_deconvolutional_layer;
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
前面的引數資訊我這裡也不再提了,直接看關鍵的三個函式,先看第一個forward_deconvolutional_layer
0x030101 forward_deconvolutional_layer
void forward_deconvolutional_layer(const layer l, network net)
{
int i;
int m = l.size*l.size*l.n;
int n = l.h*l.w;
int k = l.c;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
for(i = 0; i < l.batch; ++i){
float *a = l.weights;
float *b = net.input + i*l.c*l.h*l.w;
float *c = net.workspace;
gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs);
}
if (l.batch_normalize) {
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h);
}
activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
這裡的函式我在之前的文章中都分析過,我這裡主要分析一下這個函式的邏輯。
我們可以對比之前的卷積層,對比後發現區別有兩個:
A
轉置了col2im_cpu
函式放在了卷積函式的後面
這幾點說明了什麼?deconvolutional
確實是一種convolutional
,只是它是一種轉置的卷積。
0x030102 backward_deconvolutional_layer
void backward_deconvolutional_layer(layer l, network net)
{
int i;
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
if(l.batch_normalize){
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
}
//if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
for(i = 0; i < l.batch; ++i){
int m = l.c;
int n = l.size*l.size*l.n;
int k = l.h*l.w;
float *a = net.input + i*m*k;
float *b = net.workspace;
float *c = l.weight_updates;
im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w,
l.size, l.stride, l.pad, b);
gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(net.delta){
int m = l.c;
int n = l.h*l.w;
int k = l.size*l.size*l.n;
float *a = l.weights;
float *b = net.workspace;
float *c = net.delta + i*n*m;
gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
這個函式的理解和之前的backward_convolutional_layer
沒有太大區別,而且變化也不大。
0x030103 update_deconvolutional_layer
void update_deconvolutional_layer(layer l, update_args a)
{
float learning_rate = a.learning_rate*l.learning_rate_scale;
float momentum = a.momentum;
float decay = a.decay;
int batch = a.batch;
int size = l.size*l.size*l.c*l.n;
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
scal_cpu(l.n, momentum, l.bias_updates, 1);
if(l.scales){
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
scal_cpu(l.n, momentum, l.scale_updates, 1);
}
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
scal_cpu(size, momentum, l.weight_updates, 1);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
同樣的這個函式也只是起到了更新引數的作用,和之前的update_convolutional_layer
一樣。我們回到make_deconvolutional_layer
函式
//make_deconvolutional_layer
l.batch_normalize = batch_normalize;
if(batch_normalize){
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
}
...
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
這裡都是一些引數的配置,我在之前文章中都有說過,這裡不再重複。
好的,parse_deconvolutional
這個函式就結束了。
0x0302 parse_local
我們回到parse_network_cfg
函式
else if(lt == LOCAL){
l = parse_local(options, params);
- 1
- 2
我們來看parse_local
這個函式
local_layer parse_local(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
int stride = option_find_int(options, "stride",1);
int pad = option_find_int(options, "pad",0);
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
int batch,h,w,c;
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;
if(!(h && w && c)) error("Layer before local layer must output image.");
local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);
return layer;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
上面的一些引數我在之前的文章中已經說過了,這裡就不再說明了。直接看關鍵函式make_local_layer
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
{
...
l.forward = forward_local_layer;
l.backward = backward_local_layer;
l.update = update_local_layer;
...
fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
return l;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
同樣的前面的引數配置我們不管了,直接看三個關鍵的函式,先看第一個forward_local_layer
0x030201 forward_local_layer
void forward_local_layer(const local_layer l, network net)
{
int out_h = local_out_height(l);
int out_w = local_out_width(l);
int i, j;
int locations = out_h * out_w;
for(i = 0; i < l.batch; ++i){
copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
}
for(i = 0; i < l.batch; ++i){
float *input = net.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, net.workspace);
float *output = l.output + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = net.workspace + j;
float *c = output + j;
int m = l.n;
int n = 1;
int k = l.size*l.size*l.c;
gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
}
}
activate_array(l.output, l.outputs*l.batch, l.activation);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
我們先看前面兩個函式
int local_out_height(local_layer l)
{
int h = l.h;
if (!l.pad) h -= l.size;
else h -= 1;
return h/l.stride + 1;
}
int local_out_width(local_layer l)
{
int w = l.w;
if (!l.pad) w -= l.size;
else w -= 1;
return w/l.stride + 1;
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
這兩個函式同樣是計算卷積後的影象的高度和寬度,和我們之前的卷積層計算公式對比
(l.h + 2*l.pad - l.size) / l.stride + 1
- 1
和卷積層不一樣的是這裡沒有考慮pad
。
我們可以對比之前的卷積層,對比後發現唯一一個區別就是引數b
,我們這裡的引數b
是變化的,這個恰恰符合了local layer
的原理。local layer
就是一種權重不共享的卷積層(早期的AlexNet和GoogleNet中有所應用)。
我這裡說的可能有一些抽象¬_¬,有時間畫個圖吧,先就這樣了。
0x030202 backward_local_layer
void backward_local_layer(local_layer l, network net)
{
int i, j;
int locations = l.out_w*l.out_h;
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
for(i = 0; i < l.batch; ++i){
axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
}
for(i = 0; i < l.batch; ++i){
float *input = net.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, net.workspace);
for(j = 0; j < locations; ++j){
float *a = l.delta + i*l.outputs + j;
float *b = net.workspace + j;
float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
int m = l.n;
int n = l.size*l.size*l.c;
int k = 1;
gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
}
if(net.delta){
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = l.delta + i*l.outputs + j;
float *c = net.workspace + j;
int m = l.size*l.size*l.c;
int n = 1;
int k = l.n;
gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
}
col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w);
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
這裡和之前backward_convolutional_layer
的區別還是在b
0x030203 update_local_layer
void update_local_layer(local_layer l, update_args a)
{
float learning_rate = a.learning_rate*l.learning_rate_scale;
float momentum = a.momentum;
float decay = a.decay;
int batch = a.batch;
int locations = l.out_w*l.out_h;
int size = l.size*l.size*l.c*l.n*locations;
axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
scal_cpu(l.outputs, momentum, l.bias_updates, 1);
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
scal_cpu(size, momentum, l.weight_updates, 1);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
這個函式沒什麼好說的,就是更新引數資訊。
至此parse_local
函式就分析完了,下一章我們會回到parse_network_cfg
函式
由於本人水平有限,文中有不對之處,希望大家指出,謝謝^_^!