1. 程式人生 > 程式設計 >基於Pytorch SSD模型分析

基於Pytorch SSD模型分析

本文參考github上SSD實現,對模型進行分析,主要分析模型組成及輸入輸出大小.SSD網路結構如下圖:

基於Pytorch SSD模型分析

每輸入的影象有8732個框輸出;

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
#from layers import *
from data import voc,coco
import os
base = {
 '300': [64,64,'M',128,256,'C',512,512],'512': [],}
extras = {
 '300': [256,'S',256],}
mbox = {
 '300': [4,6,4,4],# number of boxes per feature map location
 '512': [],}

VGG基礎網路結構:

def vgg(cfg,i,batch_norm=False):
 layers = []
 in_channels = i
 for v in cfg:
  if v == 'M':
   layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
  elif v == 'C':
   layers += [nn.MaxPool2d(kernel_size=2,stride=2,ceil_mode=True)]
  else:
   conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
   if batch_norm:
    layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)]
   else:
    layers += [conv2d,nn.ReLU(inplace=True)]
   in_channels = v
 pool5 = nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
 conv6 = nn.Conv2d(512,1024,padding=6,dilation=6)
 conv7 = nn.Conv2d(1024,kernel_size=1)
 layers += [pool5,conv6,nn.ReLU(inplace=True),conv7,nn.ReLU(inplace=True)]
 return layers
size=300
vgg=vgg(base[str(size)],3)
print(vgg)

輸出為:

Conv2d(3,kernel_size=(3,3),stride=(1,1),padding=(1,1))
ReLU(inplace)
Conv2d(64,1))
ReLU(inplace)
MaxPool2d(kernel_size=2,padding=0,dilation=1,ceil_mode=False)
Conv2d(64,1))
ReLU(inplace)
Conv2d(128,ceil_mode=False)
Conv2d(128,1))
ReLU(inplace)
Conv2d(256,ceil_mode=True)
Conv2d(256,1))
ReLU(inplace)
Conv2d(512,ceil_mode=False)
Conv2d(512,1))
ReLU(inplace)
MaxPool2d(kernel_size=3,padding=1,padding=(6,6),dilation=(6,6))
ReLU(inplace)
Conv2d(1024,kernel_size=(1,1))
ReLU(inplace)

SSD中新增的網路

add_extras函式構建基本的卷積層

def add_extras(cfg,batch_norm=False):
 # Extra layers added to VGG for feature scaling
 layers = []
 in_channels = i
 flag = False
 for k,v in enumerate(cfg):
  if in_channels != 'S':
   if v == 'S':
    layers += [nn.Conv2d(in_channels,cfg[k + 1],3)[flag],padding=1)]
   else:
    layers += [nn.Conv2d(in_channels,3)[flag])]
   flag = not flag
  in_channels = v
 return layers
extra_layers=add_extras(extras[str(size)],1024)
for layer in extra_layers:
 print(layer)

輸出為:

Conv2d(1024,1))
Conv2d(256,stride=(2,2),1))
Conv2d(512,1))
Conv2d(128,1))

multibox函式得到每個特徵圖的預設box的位置計算網路和分類得分網路

def multibox(vgg,extra_layers,cfg,num_classes):
 loc_layers = []
 conf_layers = []
 vgg_source = [21,-2]
 for k,v in enumerate(vgg_source):
  loc_layers += [nn.Conv2d(vgg[v].out_channels,cfg[k] * 4,padding=1)]
  conf_layers += [nn.Conv2d(vgg[v].out_channels,cfg[k] * num_classes,padding=1)]
 for k,v in enumerate(extra_layers[1::2],2):
  loc_layers += [nn.Conv2d(v.out_channels,cfg[k]
         * 4,padding=1)]
  conf_layers += [nn.Conv2d(v.out_channels,cfg[k]
         * num_classes,padding=1)]
 return vgg,(loc_layers,conf_layers)
base_,extras_,head_ = multibox(vgg(base[str(size)],## 產生vgg19基本模型
          add_extras(extras[str(size)],1024),mbox[str(size)],num_classes)
#mbox[str(size)]為:[4,4]

得到的輸出為:

base_為上述描述的vgg網路,extras_為extra_layers網路,head_為:

([Conv2d(512,16,1)),Conv2d(1024,24,Conv2d(512,Conv2d(256,1))],[Conv2d(512,84,126,1))])

SSD網路及forward函式為:

class SSD(nn.Module):
 """Single Shot Multibox Architecture
 The network is composed of a base VGG network followed by the
 added multibox conv layers. Each multibox layer branches into
  1) conv2d for class conf scores
  2) conv2d for localization predictions
  3) associated priorbox layer to produce default bounding
   boxes specific to the layer's feature map size.
 See: https://arxiv.org/pdf/1512.02325.pdf for more details.

 Args:
  phase: (string) Can be "test" or "train"
  size: input image size
  base: VGG16 layers for input,size of either 300 or 500
  extras: extra layers that feed to multibox loc and conf layers
  head: "multibox head" consists of loc and conf conv layers
 """

 def __init__(self,phase,size,base,extras,head,num_classes):
  super(SSD,self).__init__()
  self.phase = phase
  self.num_classes = num_classes 
  self.cfg = (coco,voc)[num_classes == 21]
  self.priorbox = PriorBox(self.cfg)
  self.priors = Variable(self.priorbox.forward(),volatile=True)
  self.size = size

  # SSD network
  self.vgg = nn.ModuleList(base)
  # Layer learns to scale the l2 normalized features from conv4_3
  self.L2Norm = L2Norm(512,20)
  self.extras = nn.ModuleList(extras)

  self.loc = nn.ModuleList(head[0])
  self.conf = nn.ModuleList(head[1])

  if phase == 'test':
   self.softmax = nn.Softmax(dim=-1)
   self.detect = Detect(num_classes,200,0.01,0.45)

 def forward(self,x):
  """Applies network layers and ops on input image(s) x.

  Args:
   x: input image or batch of images. Shape: [batch,3,300,300].

  Return:
   Depending on phase:
   test:
    Variable(tensor) of output class label predictions,confidence score,and corresponding location predictions for
    each object detected. Shape: [batch,topk,7]

   train:
    list of concat outputs from:
     1: confidence layers,Shape: [batch*num_priors,num_classes]
     2: localization layers,Shape: [batch,num_priors*4]
     3: priorbox layers,Shape: [2,num_priors*4]
  """
  sources = list()
  loc = list()
  conf = list()

  # apply vgg up to conv4_3 relu
  for k in range(23):
   x = self.vgg[k](x) ##得到的x尺度為[1,38,38]

  s = self.L2Norm(x)
  sources.append(s)

  # apply vgg up to fc7
  for k in range(23,len(self.vgg)):
   x = self.vgg[k](x) ##得到的x尺寸為[1,19,19]
  sources.append(x)

  # apply extra layers and cache source layer outputs
  for k,v in enumerate(self.extras):
   x = F.relu(v(x),inplace=True)
   if k % 2 == 1:
    sources.append(x)
  '''
  上述得到的x輸出分別為:
  torch.Size([1,10,10])
  torch.Size([1,5,5])
  torch.Size([1,3])
  torch.Size([1,1,1])
  '''

  # apply multibox head to source layers
  for (x,l,c) in zip(sources,self.loc,self.conf):
   loc.append(l(x).permute(0,2,1).contiguous())
   conf.append(c(x).permute(0,1).contiguous())

  loc = torch.cat([o.view(o.size(0),-1) for o in loc],1)
  conf = torch.cat([o.view(o.size(0),-1) for o in conf],1)
  if self.phase == "test":
   output = self.detect(
    loc.view(loc.size(0),-1,4),# loc preds
    self.softmax(conf.view(conf.size(0),self.num_classes)),# conf preds
    self.priors.type(type(x.data))     # default boxes
   )
  else:
   output = (
    loc.view(loc.size(0),#[1,8732,4]
    conf.view(conf.size(0),self.num_classes),21]
    self.priors
   )
  return output

上述程式碼中sources中儲存的資料輸出如下,即用於邊框提取的特徵圖:

torch.Size([1,38])
torch.Size([1,19])
torch.Size([1,10])
torch.Size([1,5])
torch.Size([1,3])
torch.Size([1,1])

模型輸入為

x=Variable(torch.randn(1,300))

以上這篇基於Pytorch SSD模型分析就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支援我們。