Pytorch實現InceptionV1模型(GoogLeNet)
阿新 • • 發佈:2021-02-04
Pytorch實現InceptionV1模型
模型簡介
論文連結:https://arxiv.org/pdf/1409.4842v1.pdf
模型提出時間:2014年9月
引數量:5M
Top1準確率:69.8%
模型基本思想:同時使用1×1、3×3和5×5的卷積核對影象進行特徵提取,充分增加了模型的寬度,從而提高模型對影象不同尺度特徵的適應能力。其中,1×1的卷積核實際是對影象進行縮放,這對識別精度的提升也有一定幫助。
模型結構及原始碼
InceptionV1模組
InceptionV1模組由下圖中的基本模組構成,其中每個子模組的stride均為1,padding均為⌊kernel_size/2⌋,通過這樣設定stride和padding的大小,每個模組在處理圖片時不會改變圖片的大小,而只改變通道數目。由下圖可見,InceptionV1模型的輸出是四個卷積核的輸出疊加的結果,即InceptionV1並沒有改變圖片的大小,而只改變了通道數目。
InceptionV1模組中的每個卷積模組由三部分構成:卷積、批標準化和啟用。
InceptionV1模組中的卷積模組原始碼如下:
# InceptionV1的卷積模組:卷積 + 批標準化 + 啟用
def ConvBNReLU(in_channels,out_channels,kernel_size):
return nn.Sequential(
# 卷積核的stride=1, padding=kernel_size//2
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1,padding=kernel_size//2),
nn.BatchNorm2d(out_channels),
nn. ReLU6(inplace=True)
)
InceptionV1模組的原始碼如下:
# InceptionV1模組
class InceptionV1Module(nn.Module):
def __init__(self, inchannel, outchannel1, outchannel2_1, outchannel2_2, outchannel3_1, outchannel3_2, outchannel4):
super(InceptionV1Module, self).__init__()
# 1*1卷積模組
self.block1 = ConvBNRelu(inchannel, outchannel1, 1)
# 1*1卷積模組 + 3*3卷積模組
self.block2_1 = ConvBNRelu(inchannel, outchannel2_1, 1)
self.block2_2 = ConvBNRelu(outchannel2_1, outchannel2_2, 3)
# 1*1卷積模組 + 5*5卷積模組
self.block3_1 = ConvBNRelu(inchannel, outchannel3_1, 1)
self.block3_2 = ConvBNRelu(outchannel3_1, outchannel3_2, 5)
# 3*3池化模組 + 1*1卷積模組
self.block4_1 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1)
self.block4_2 = ConvBNRelu(inchannel, outchannel4, 1)
def forward(self, x):
x1 = self.block1(x) # output1
x2 = self.block2_1(x)
x2 = self.block2_2(x2) # output2
x3 = self.block3_1(x)
x3 = self.block3_2(x3) # output3
x4 = self.block4_1(x)
x4 = self.block4_2(x4) # output4
x = torch.cat([x1, x2, x3, x4], dim=1) # 四個模組的輸出疊加
return x
全連線模組
經過多個Inception模組處理後,可將Inception模組的輸出通過一個全連線模組得到最終輸出,全連線模組的基本構架如下圖所示。全連線模組的輸入是14×14的矩陣,而輸出是待分類類別個數。全連線模組的結構和原始碼如下:
# 全連線模組
class InceptionAux(nn.Module):
def __init__(self, in_channels,out_channels):
super(InceptionAux, self).__init__()
# 5*5平均池化
self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
# 1*1卷積模組
self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
# 全連線1 + 啟用 + dropout
self.auxiliary_linear1 = nn.Linear(in_features=128 * 4 * 4, out_features=1024)
self.auxiliary_relu = nn.ReLU6(inplace=True)
self.auxiliary_dropout = nn.Dropout(p=0.7)
# 全連線2
self.auxiliary_linear2 = nn.Linear(in_features=1024, out_features=out_channels)
def forward(self, x):
x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
x = x.view(x.size(0), -1)
x= self.auxiliary_relu(self.auxiliary_linear1(x))
out = self.auxiliary_linear2(self.auxiliary_dropout(x))
return out
GoogLeNet模型
GoogLeNet實際上就是多個InceptionV1模組和全連線模組的組合,其模型框架圖如下圖所示。
GoogLeNet接受的輸入圖片大小為224×224,可將其分解為幾個子模組進行分析。其中,子模組1由多個卷積和池化操作構成,子模組2由三個InceptionV1模組構成,子模組3由三個InceptionV1模組和一個全連線模組構成,子模組4由三個InceptionV1模組和兩個全連線模組構成。各模組的細節如下圖所示:GoogLeNet模型的原始碼如下:
# GoogLeNet模型
class GoogLeNet(nn.Module):
def __init__(self, num_classes=1000, stage='train'):
super(InceptionV1, self).__init__()
self.stage = stage
# 子模組1:一系列卷積池化操作
self.block1 = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=3,stride=2, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1),
nn.BatchNorm2d(64),
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(192),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
)
# 子模組2:三個InceptionV1模組
self.block2 = nn.Sequential(
InceptionV1Module(in_channels=192,out_channels1=64, out_channels2reduce=96, out_channels2=128,
out_channels3reduce = 16, out_channels3=32, out_channels4=32),
InceptionV1Module(in_channels=256, out_channels1=128, out_channels2reduce=128, out_channels2=192,
out_channels3reduce=32, out_channels3=96, out_channels4=64),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
InceptionV1Module(in_channels=480, out_channels1=192, out_channels2reduce=96, out_channels2=208,
out_channels3reduce=16, out_channels3=48, out_channels4=64)
)
# 子模組3:三個InceptionV1模組 + 一個全連線模組
if self.stage == 'train':
self.aux_logits1 = InceptionAux(in_channels=512,out_channels=num_classes)
self.block3 = nn.Sequential(
InceptionV1Module(in_channels=512, out_channels1=160, out_channels2reduce=112, out_channels2=224,
out_channels3reduce=24, out_channels3=64, out_channels4=64),
InceptionV1Module(in_channels=512, out_channels1=128, out_channels2reduce=128, out_channels2=256,
out_channels3reduce=24, out_channels3=64, out_channels4=64),
InceptionV1Module(in_channels=512, out_channels1=112, out_channels2reduce=144, out_channels2=288,
out_channels3reduce=32, out_channels3=64, out_channels4=64),
)
# 子模組4:三個InceptionV1模組 + 兩個全連線模組
if self.stage == 'train':
self.aux_logits2 = InceptionAux(in_channels=528,out_channels=num_classes)
self.block4 = nn.Sequential(
InceptionV1Module(in_channels=528, out_channels1=256, out_channels2reduce=160, out_channels2=320,
out_channels3reduce=32, out_channels3=128, out_channels4=128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
InceptionV1Module(in_channels=832, out_channels1=256, out_channels2reduce=160, out_channels2=320,
out_channels3reduce=32, out_channels3=128, out_channels4=128),
InceptionV1Module(in_channels=832, out_channels1=384, out_channels2reduce=192, out_channels2=384,
out_channels3reduce=48, out_channels3=128, out_channels4=128),
)
self.block4_1 = nn.Sequential(
nn.AvgPool2d(kernel_size=7,stride=1),
nn.Dropout(p=0.4),
nn.Linear(in_features=1024,out_features=num_classes)
)
def forward(self, x):
x = self.block1(x)
aux1 = x = self.block2(x)
aux2 = x = self.block3(x)
x = self.block4(x)
out = self.block4_1(x)
if self.stage == 'train':
aux1 = self.aux_logits1(aux1)
aux2 = self.aux_logits2(aux2)
return aux1, aux2, out
else:
return out
完整原始碼
InceptionV1模型(GoogLeNet模型)的完整程式碼如下:
import torch
import torch.nn as nn
import torchvision
# InceptionV1的卷積模組:卷積 + 批標準化 + 啟用
def ConvBNReLU(in_channels,out_channels,kernel_size):
return nn.Sequential(
# 卷積核的stride=1, padding=kernel_size//2
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1,padding=kernel_size//2),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
# InceptionV1模組
class InceptionV1Module(nn.Module):
def __init__(self, inchannel, outchannel1, outchannel2_1, outchannel2_2, outchannel3_1, outchannel3_2, outchannel4):
super(InceptionV1Module, self).__init__()
# 1*1卷積模組
self.block1 = ConvBNRelu(inchannel, outchannel1, 1)
# 1*1卷積模組 + 3*3卷積模組
self.block2_1 = ConvBNRelu(inchannel, outchannel2_1, 1)
self.block2_2 = ConvBNRelu(outchannel2_1, outchannel2_2, 3)
# 1*1卷積模組 + 5*5卷積模組
self.block3_1 = ConvBNRelu(inchannel, outchannel3_1, 1)
self.block3_2 = ConvBNRelu(outchannel3_1, outchannel3_2, 5)
# 3*3池化模組 + 1*1卷積模組
self.block4_1 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1)
self.block4_2 = ConvBNRelu(inchannel, outchannel4, 1)
def forward(self, x):
x1 = self.block1(x) # output1
x2 = self.block2_1(x)
x2 = self.block2_2(x2) # output2
x3 = self.block3_1(x)
x3 = self.block3_2(x3) # output3
x4 = self.block4_1(x)
x4 = self.block4_2(x4) # output4
x = torch.cat([x1, x2, x3, x4], dim=1) # 四個模組的輸出疊加
return x
# 全連線模組
class InceptionAux(nn.Module):
def __init__(self, in_channels,out_channels):
super(InceptionAux, self).__init__()
# 5*5平均池化
self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
# 1*1卷積模組
self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
# 全連線1 + 啟用 + dropout
self.auxiliary_linear1 = nn.Linear(in_features=128 * 4 * 4, out_features=1024)
self.auxiliary_relu = nn.ReLU6(inplace=True)
self.auxiliary_dropout = nn.Dropout(p=0.7)
# 全連線2
self.auxiliary_linear2 = nn.Linear(in_features=1024, out_features=out_channels)
def forward(self, x):
x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
x = x.view(x.size(0), -1)
x= self.auxiliary_relu(self.auxiliary_linear1(x))
out = self.auxiliary_linear2(self.auxiliary_dropout(x))
return out
# GoogLeNet模型
class GoogLeNet(nn.Module):
def __init__(self, num_classes=1000, stage='train'):
super(InceptionV1, self).__init__()
self.stage = stage
# 子模組1:一系列卷積池化操作
self.block1 = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=3,stride=2, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1),
nn.BatchNorm2d(64),
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(192),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
)
# 子模組2:三個InceptionV1模組
self.block2 = nn.Sequential(
InceptionV1Module(in_channels=192,out_channels1=64, out_channels2reduce=96, out_channels2=128,
out_channels3reduce = 16, out_channels3=32, out_channels4=32),
InceptionV1Module(in_channels=256, out_channels1=128, out_channels2reduce=128, out_channels2=192,
out_channels3reduce=32, out_channels3=96, out_channels4=64),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
InceptionV1Module(in_channels=480, out_channels1=192, out_channels2reduce=96, out_channels2=208,
out_channels3reduce=16, out_channels3=48, out_channels4=64)
)
# 子模組3:三個InceptionV1模組 + 一個全連線模組
if self.stage == 'train':
self.aux_logits1 = InceptionAux(in_channels=512,out_channels=num_classes)
self.block3 = nn.Sequential(
InceptionV1Module(in_channels=512, out_channels1=160, out_channels2reduce=112, out_channels2=224,
out_channels3reduce=24, out_channels3=64, out_channels4=64),
InceptionV1Module(in_channels=512, out_channels1=128, out_channels2reduce=128, out_channels2=256,
out_channels3reduce=24, out_channels3=64, out_channels4=64),
InceptionV1Module(in_channels=512, out_channels1=112, out_channels2reduce=144, out_channels2=288,
out_channels3reduce=32, out_channels3=64, out_channels4=64),
)
# 子模組4:三個InceptionV1模組 + 兩個全連線模組
if self.stage == 'train':
self.aux_logits2 = InceptionAux(in_channels=528,out_channels=num_classes)
self.block4 = nn.Sequential(
InceptionV1Module(in_channels=528, out_channels1=256, out_channels2reduce=160, out_channels2=320,
out_channels3reduce=32, out_channels3=128, out_channels4=128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
InceptionV1Module(in_channels=832, out_channels1=256, out_channels2reduce=160, out_channels2=320,
out_channels3reduce=32, out_channels3=128, out_channels4=128),
InceptionV1Module(in_channels=832, out_channels1=384, out_channels2reduce=192, out_channels2=384,
out_channels3reduce=48, out_channels3=128, out_channels4=128),
)
self.block4_1 = nn.Sequential(
nn.AvgPool2d(kernel_size=7,stride=1),
nn.Dropout(p=0.4),
nn.Linear(in_features=1024,out_features=num_classes)
)
def forward(self, x):
x = self.block1(x)
aux1 = x = self.block2(x)
aux2 = x = self.block3(x)
x = self.block4(x)
out = self.block4_1(x)
if self.stage == 'train':
aux1 = self.aux_logits1(aux1)
aux2 = self.aux_logits2(aux2)
return aux1, aux2, out
else:
return out