1. 程式人生 > 實用技巧 >FCOS官方程式碼詳解(二):Architecture(head)

FCOS官方程式碼詳解(二):Architecture(head)

https://blog.csdn.net/laizi_laizi/article/details/105519290

FCOS官方程式碼詳解(二):Architecture[head]

上一篇寫到head部分就感覺太長了,還是分開來寫:FCOS官方程式碼詳解(一):Architecture(backbone)
這一篇就繼續把architecture中的fcos_head分析一下,腦海中一直要有這圖的印象:

fcos_head

在類GeneralizedRCNN初始化的時候還有這麼一句:self.rpn = build_rpn(cfg, self.backbone.out_channels)

,其實這裡沒改過來,實際構造的是fcos_head,返回的是build_fcos(cfg, in_channels),具體程式碼在fcos_core/modeling/rpn/fcos/fcos.py
然後build_fcos返回的是FCOSModule

def build_fcos(cfg, in_channels):
    return FCOSModule(cfg, in_channels)
  • 1
  • 2

看一下FCOSModule()的初始化部分

class FCOSModule(torch.nn.Module):
    """
    Module for FCOS computation. Takes feature maps from the backbone and
    FCOS outputs and losses. Only Test on FPN now.
    """

    def __init__(self, cfg, in_channels):
        super(FCOSModule, self).__init__()

        head = FCOSHead(cfg, in_channels)  # 構造fcos的頭部

        box_selector_test = make_fcos_postprocessor(cfg)

        loss_evaluator = make_fcos_loss_evaluator(cfg)
        self.head = head
        self.box_selector_test = box_selector_test
        self.loss_evaluator = loss_evaluator
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES  # eg:[8, 16, 32, 64, 128]

    def forward(self, images, features, targets=None):  # 呼叫的時候:self.rpn(images, features, targets)
        pass
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21

那就轉過去看一下FCOSHead:

class FCOSHead(torch.nn.Module):
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
            這個就是fpn每層的輸出通道數,根據之前分析,都是一樣的,如256
        """
        super(FCOSHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1              # eg:80
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES             # eg:[8, 16, 32, 64, 128]
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS   # eg:False 直接回歸還是歸一化後迴歸
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG # eg:False centerness和哪個分支共用特徵
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER   # eg:False

        cls_tower = []
        bbox_tower = []
        # eg: cfg.MODEL.FCOS.NUM_CONVS=4頭部共享特徵時(也稱作tower)有4層卷積層
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            # cls_tower和bbox_tower都是4層的256通道的3×3的卷積層,後加一些GN和Relu
            cls_tower.append(
                conv_func(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=True
                )
            )
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                conv_func(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=True
                )
            )
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        # cls_logits就是網路的直接分類輸出結果,shape:[H×W×C]
        self.cls_logits = nn.Conv2d(
            in_channels, num_classes, kernel_size=3, stride=1,
            padding=1
        )
        # bbox_pred就是網路的迴歸分支輸出結果,shape:[H×W×4]
        self.bbox_pred = nn.Conv2d(
            in_channels, 4, kernel_size=3, stride=1,
            padding=1
        )
        # centerness就是網路抑制低質量框的分支,shape:[H×W×1]
        self.centerness = nn.Conv2d(
            in_channels, 1, kernel_size=3, stride=1,
            padding=1
        )

        # initialization 這些層裡面的卷積引數都進行初始化
        for modules in [self.cls_tower, self.bbox_tower,
                        self.cls_logits, self.bbox_pred,
                        self.centerness]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss 我只知道分類是用focal loss,可能是一種經驗trick?
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        # P3-P7共有5層特徵FPN,縮放因子,對迴歸結果進行縮放
        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])  

    def forward(self, x):
        logits = []
        bbox_reg = []
        centerness = []
        # 我想這裡的x應該是fpn出來的各層特徵,因為x根據下一句看是可迭代的
        for l, feature in enumerate(x):
            # 要注意,不圖層經過tower之後的特徵圖大小是不一樣的
            # 還有一點就是,不同層的特徵都是共享一個tower,無論是cls分支還是bbox分支
            cls_tower = self.cls_tower(feature)
            box_tower = self.bbox_tower(feature)

            logits.append(self.cls_logits(cls_tower))
            # 根據centerness_on_reg選擇對應的tower特徵
            if self.centerness_on_reg:
                centerness.append(self.centerness(box_tower))
            else:
                centerness.append(self.centerness(cls_tower))

            bbox_pred = self.scales[l](self.bbox_pred(box_tower))  # 得到縮放後的bbox_pred
            if self.norm_reg_targets:
                bbox_pred = F.relu(bbox_pred)
                if self.training:
                    bbox_reg.append(bbox_pred)
                else:
                    bbox_reg.append(bbox_pred * self.fpn_strides[l])
            else:
                bbox_reg.append(torch.exp(bbox_pred))
        return logits, bbox_reg, centerness
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  1. 關於在迴歸分支為什麼要有一個指數e的運算,原論文這麼說:

Moreover, since the regression targets are always positive, we employ exp(x) to map any real number to (0,+ ∞ +\infty+∞) on the top of the regression branch

  1. 關於上面程式碼中的對於bbox_pred縮放,在原論文中只有這麼一塊說到:

    可以看到為了能夠繼續在不同級的特徵共享head,這裡把迴歸預測結果乘以一個縮放因子,這個因子是tensor,是可以更新,即可以學習的,當然分類分支不需要。
    這裡放一下我打印出來的head部分:
(rpn): FCOSModule(
    (head): FCOSHead(
      (cls_tower): Sequential(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): GroupNorm(32, 256, eps=1e-05, affine=True)
        (2): ReLU()
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): GroupNorm(32, 256, eps=1e-05, affine=True)
        (5): ReLU()
        (6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (7): GroupNorm(32, 256, eps=1e-05, affine=True)
        (8): ReLU()
        (9): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (10): GroupNorm(32, 256, eps=1e-05, affine=True)
        (11): ReLU()
      )
      (bbox_tower): Sequential(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): GroupNorm(32, 256, eps=1e-05, affine=True)
        (2): ReLU()
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): GroupNorm(32, 256, eps=1e-05, affine=True)
        (5): ReLU()
        (6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (7): GroupNorm(32, 256, eps=1e-05, affine=True)
        (8): ReLU()
        (9): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (10): GroupNorm(32, 256, eps=1e-05, affine=True)
        (11): ReLU()
      )
      (cls_logits): Conv2d(256, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bbox_pred): Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (centerness): Conv2d(256, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (scales): ModuleList(
        (0): Scale()
        (1): Scale()
        (2): Scale()
        (3): Scale()
        (4): Scale()
      )
    )
    (box_selector_test): FCOSPostProcessor()
  )
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43

至此,整個FCOS的網路結構就理清楚了!關於FCOSModule的前向傳播程式碼可以放訓練的部分一起講!