1. 程式人生 > 實用技巧 >[Paddle學習筆記][09][基於YOLOv3的昆蟲檢測-模型設計]

[Paddle學習筆記][09][基於YOLOv3的昆蟲檢測-模型設計]

說明:

本例程使用YOLOv3進行昆蟲檢測。例程分為資料處理、模型設計、損失函式、訓練模型、模型預測和測試模型六個部分。本篇為第二部分,使用Paddle動態圖實現了YOLOv3,使用Darknet53骨幹網路和YOLOv3的檢測頭部。

實驗程式碼:

Darknet53骨幹網路和YOLOv3頭部

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable

from source.model import DarkNet53, YOLOHeader

with fluid.dygraph.guard():
    
# 輸入資料 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 資料處理 backbone = DarkNet53() # 骨幹網路 detect_0 = YOLOHeader(num_channels=1024, num_filters=512) # 檢測頭部 detect_1 = YOLOHeader(num_channels=512, num_filters=256) #
檢測頭部 detect_2 = YOLOHeader(num_channels=256, num_filters=128) # 檢測頭部 c0, c1, c2 = backbone(x) c0_r, c0_t = detect_0(c0) c1_r, c1_t = detect_1(c1) c2_r, c2_t = detect_2(c2) # 輸出資料 print('c0:', c0.shape, 'route:', c0_r.shape, 'tip:', c0_t.shape) print('c1:', c1.shape, '
route:', c1_r.shape, 'tip:', c1_t.shape) print('c2:', c2.shape, ' route:', c2_r.shape, 'tip:', c2_t.shape)

結果:

c0: [1, 1024, 19, 19] route: [1, 512, 19, 19] tip: [1, 1024, 19, 19]

c1: [1, 512, 38, 38] route: [1, 256, 38, 38] tip: [1, 512, 38, 38]

c2: [1, 256, 76, 76] route: [1, 128, 76, 76] tip: [1, 256, 76, 76]

完整的YOLOV3模型

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable

from source.model import YOLOv3

with fluid.dygraph.guard():
    # 輸入資料
    x = np.random.randn(1, 3, 608, 608).astype(np.float32)
    x = to_variable(x)
    
    # 資料處理
    num_classes = 7                                 # 類別數量
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 錨框掩碼
    
    model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
    
    p0, p1, p2 = model(x)
    
    # 輸出資料
    print('p0:', p0.shape)
    print('p1:', p1.shape)
    print('p2:', p2.shape)

結果:

p0: [1, 36, 19, 19]

p1: [1, 36, 38, 38]

p2: [1, 36, 76, 76]

每個YOLOv3頭部的輸出特徵

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable

from source.model import YOLOv3

with fluid.dygraph.guard():
    # 輸入資料
    x = np.random.randn(1, 3, 608, 608).astype(np.float32)
    x = to_variable(x)
    
    # 資料處理
    num_classes = 7                                 # 類別數量
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 錨框掩碼
    
    model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
    
    p0, p1, p2 = model(x)
    
    p0 = fluid.layers.reshape(p0, [-1, len(anchor_mask[0]), 5 + num_classes, p0.shape[2], p0.shape[3]])

    pdloc = p0[:, :, 0:4, :, :]                                   # 位置概率
    pdobj = fluid.layers.sigmoid(p0[:, :, 4, :, :])               # 物體概率
    pdcls = fluid.layers.sigmoid(p0[:, :, 5:5+num_classes, :, :]) # 類別概率
    
    print('predict_loc', pdloc.shape)
    print('predict_obj', pdobj.shape)
    print('predict_cls', pdcls.shape)

結果:

predict_loc [1, 3, 4, 19, 19]

predict_obj [1, 3, 19, 19]

predict_cls [1, 3, 7, 19, 19]

model.py檔案

import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay

# 卷積正則化層
class ConvBN(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters, filter_size, stride, padding):
        super(ConvBN, self).__init__()
        
        self.conv = Conv2D(
            num_channels=num_channels, num_filters=num_filters,
            filter_size=filter_size, stride=stride, padding=padding,
            param_attr=ParamAttr(initializer=fluid.initializer.Normal(0, 0.02)), # 正態分佈權重
            bias_attr=False,                                                     # 沒有偏置引數
            act=None)
        self.batch_norm = BatchNorm(
            num_channels=num_filters,
            param_attr=ParamAttr(
                initializer=fluid.initializer.Normal(0, 0.02), 
                regularizer=L2Decay(0)), # 正態分佈權重,遮蔽正則項
            bias_attr=ParamAttr(
                initializer=fluid.initializer.Constant(0), 
                regularizer=L2Decay(0)), # 常量偏置引數,遮蔽正則項
            act=None)
    
    def forward(self, x):
        x = self.conv(x)
        x = self.batch_norm(x)
        x = fluid.layers.leaky_relu(x=x, alpha=0.1) # leaky_relu啟用函式,輸出out=max(x,0.1∗x)
        
        return x

# 下采樣層模組
class DownSample(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters, filter_size=3, stride=2, padding=1):
        super(DownSample, self).__init__()
        
        self.conv_bn = ConvBN(
            num_channels=num_channels, num_filters=num_filters, 
            filter_size=filter_size, stride=stride, padding=padding)
    
    def forward(self, x):
        x = self.conv_bn(x)
        
        return x
    
# 上取樣層模組
class UpSample(fluid.dygraph.Layer):
    def __init__(self, scale=2):
        super(UpSample, self).__init__()
        
        self.scale = scale
        
    def forward(self, x):
        # 設定輸出寬高
        shape = fluid.layers.shape(input=x)                                     # 獲取輸入形狀
        sizes = fluid.layers.slice(input=shape, axes=[0], starts=[2], ends=[4]) # 獲取輸入寬高
        sizes = fluid.layers.cast(x=sizes, dtype='int32')                       # 轉換資料型別
        sizes.stop_gradient = True                                              # 停止梯度計算
        
        # 調整輸入寬高        
        sizes = sizes * self.scale                                              # 設定輸出寬高
        x = fluid.layers.resize_nearest(input=x, out_shape=sizes, scale=self.scale)
        
        return x

# 基礎殘差模組
class BasicBlock(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters):
        super(BasicBlock, self).__init__()
        
        self.conv_bn_1 = ConvBN(
            num_channels=num_channels, num_filters=num_filters,
            filter_size=1, stride=1, padding=0)
        self.conv_bn_2 = ConvBN(
            num_channels=num_filters, num_filters=num_filters * 2,
            filter_size=3, stride=1, padding=1)
    
    def forward(self, x):
        t = self.conv_bn_1(x)
        y = self.conv_bn_2(t)
        z = fluid.layers.elementwise_add(x=x, y=y, act=None)
        
        return z

# 基礎殘差塊組
class BlockGroup(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters, num_blocks):
        super(BlockGroup, self).__init__()
        
        # 第一個殘差模組
        self.basicblock_0 = BasicBlock(num_channels=num_channels, num_filters=num_filters)
        
        # 剩餘的殘差模組
        self.block_list = [] # 基礎模組列表
        for i in range(1, num_blocks):
            block_item = self.add_sublayer(
                'block_' + str(i),
                BasicBlock(num_channels=num_channels, num_filters=num_filters))
            self.block_list.append(block_item)
        
    def forward(self, x):
        # 第一個殘差模組
        x = self.basicblock_0(x)
        
        # 剩餘的殘差模組
        for block_item in self.block_list:
            x = block_item(x)
        
        return x

# 骨幹網路模組
class DarkNet53(fluid.dygraph.Layer):
    def __init__(self):
        super(DarkNet53, self).__init__()
        
        # 輸入卷積取樣
        self.conv_bn_1 = ConvBN(num_channels=3, num_filters=32, filter_size=3, stride=1, padding=1)
        self.down_sample_1 = DownSample(num_channels=32, num_filters=64)
        
        # 基礎殘差塊組
        self.num_groups = [1, 2, 8, 8, 4] # 每組的模組數
        
        self.group_list = [] # 殘差塊組列表
        for i, num_blocks in enumerate(self.num_groups):
            group_item = self.add_sublayer( # 每組殘差模組第一個模組輸入維度,第一個模組輸出維度,和每組殘差模組個數
                'group_' + str(i),
                BlockGroup(num_channels=32*(2**(i+1)), num_filters=32*(2**i), num_blocks=num_blocks))
            self.group_list.append(group_item)
        
        # 向下取樣塊組
        self.downs_list = [] # 向下取樣列表
        for i in range(len(self.num_groups) - 1):
            downs_item = self.add_sublayer(
                'downs_' + str(i),
                DownSample(num_channels=32*(2**(i+1)), num_filters=32*(2**(i+2))))
            self.downs_list.append(downs_item)
            
    def forward(self, x):
        # 提取特徵影象
        x = self.conv_bn_1(x)     # 提取特徵影象
        x = self.down_sample_1(x) # 縮小特徵影象
        
        # 輸出特徵影象
        c_list = [] # 輸出特徵列表
        for i, group_item in enumerate(self.group_list):
            # 提取特徵影象
            x = group_item(x) # 提取特徵影象
            c_list.append(x)  # 新增輸出列表
            
            # 縮小特徵影象
            if i < len(self.num_groups) - 1:
                x = self.downs_list[i](x)
        
        return c_list[-1:-4:-1] # 輸出c0, c1, c2
    
# 檢測頭部模組
class YOLOHeader(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters):
        super(YOLOHeader, self).__init__()
        
        assert num_filters % 2 == 0, "num_filters {} cannot be devided by 2".format(num_filters)
        
        self.conv_bn_1 = ConvBN(
            num_channels=num_channels, num_filters=num_filters,
            filter_size=1, stride=1, padding=0)
        self.conv_bn_2 = ConvBN(
            num_channels=num_filters, num_filters=num_filters * 2,
            filter_size=3, stride=1, padding=1)
        self.conv_bn_3 = ConvBN(
            num_channels=num_filters * 2, num_filters=num_filters,
            filter_size=1, stride=1, padding=0)
        self.conv_bn_4 = ConvBN(
            num_channels=num_filters, num_filters=num_filters * 2,
            filter_size=3, stride=1, padding=1)
        
        self.route = ConvBN(
            num_channels=num_filters * 2, num_filters=num_filters,
            filter_size=1, stride=1, padding=0)
        self.tip = ConvBN(
            num_channels=num_filters, num_filters=num_filters * 2,
            filter_size=3, stride=1, padding=1)
    
    def forward(self, x):
        # 提取特徵影象
        x = self.conv_bn_1(x)
        x = self.conv_bn_2(x)
        x = self.conv_bn_3(x)
        x = self.conv_bn_4(x)
        
        # 輸出特徵影象
        route = self.route(x)
        tip = self.tip(route)
        
        return route, tip # 輸出route, tip
    
# 目標檢測模組
class YOLOv3(fluid.dygraph.Layer):
    def __init__(self, num_classes, anchor_mask):
        super(YOLOv3, self).__init__()
        
        # 初始骨幹網路
        self.backbone = DarkNet53() # 骨幹網路
        
        # 初始檢測模組
        self.num_classes = num_classes # 類別數量
        self.anchor_mask = anchor_mask # 錨框掩碼
        
        self.dete_list = [] # 檢測頭部列表
        self.conv_list = [] # 輸出卷積列表
        self.rout_list = [] # 連線路徑列表
        
        for i in range(len(self.anchor_mask)):
            # 新增檢測頭部
            dete_item = self.add_sublayer(
                'dete_' + str(i),
                YOLOHeader(
                    num_channels=1024//(2**i) if i==0 else 1024//(2**i) + 512//(2**i), 
                    num_filters=512//(2**i)))
            self.dete_list.append(dete_item)
            
            # 新增輸出卷積
            conv_item = self.add_sublayer(
                'conv_' + str(i),
                Conv2D(
                    num_channels=1024//(2**i),
                    num_filters=len(self.anchor_mask[i]) * (self.num_classes + 5),
                    filter_size=1, stride=1, padding=0,
                    param_attr=ParamAttr( # 正態分佈權重
                        initializer=fluid.initializer.Normal(0, 0.02)),
                    bias_attr=ParamAttr(  # 常量偏置引數,遮蔽正則項
                        initializer=fluid.initializer.Constant(0), 
                        regularizer=L2Decay(0)),
                    act=None))
            self.conv_list.append(conv_item)
            
            # 新增連線路徑
            if i < len(self.anchor_mask) - 1:
                rout_item = self.add_sublayer(
                    'rout_' + str(i),
                    ConvBN(
                        num_channels=512//(2**i), num_filters=256//(2**i),
                        filter_size=1, stride=1, padding=0))
                self.rout_list.append(rout_item)
        
        # 初始上取樣層
        self.upsample = UpSample()
        
    def forward(self, x):
        # 提取特徵影象
        c_list = self.backbone(x) # 骨幹網路輸出
        
        # 輸出檢測結果
        p_list = []               # 檢測模組輸出
        for i, c_item in enumerate(c_list):
            # 連線上取樣層
            if i > 0: # 如果不是c0輸出,則用輸出連線c0的route
                c_item = fluid.layers.concat(input=[route, c_item], axis=1)
            
            # 輸出檢測結果
            route, tip = self.dete_list[i](c_item) # 檢測頭部輸出
            p_item = self.conv_list[i](tip)        # 輸出卷積結果
            p_list.append(p_item)                  # 新增輸出列表
            
            # 輸出上取樣層
            if i < len(self.anchor_mask) - 1:
                route = self.rout_list[i](route) # 提取取樣特徵
                route = self.upsample(route)     # 放大采樣特徵
        
        return p_list

參考資料:

https://blog.csdn.net/litt1e/article/details/88814417

https://blog.csdn.net/litt1e/article/details/88852745

https://blog.csdn.net/litt1e/article/details/88907542

https://aistudio.baidu.com/aistudio/projectdetail/742781

https://aistudio.baidu.com/aistudio/projectdetail/672017

https://aistudio.baidu.com/aistudio/projectdetail/868589

https://aistudio.baidu.com/aistudio/projectdetail/122277