pytorch 訓練資料以及測試 全部程式碼(5) 網路
from networks import deeplab_xception, deeplab_resnet # Network definition if backbone == 'xception': net = deeplab_xception.DeepLabv3_plus(nInputChannels=3, n_classes=21, os=16, pretrained=True) elif backbone == 'resnet': net = deeplab_resnet.DeepLabv3_plus(nInputChannels=3, n_classes=21, os=16, pretrained=True) else: raise NotImplementedError
這裡面有兩個網路xception和resnet
下面講解檔案deeplab_resnet裡面的resnet
class DeepLabv3_plus(nn.Module): def __init__(self, nInputChannels=3, n_classes=21, os=16, pretrained=False, _print=True): if _print: print("Constructing DeepLabv3+ model...") print("Number of classes: {}".format(n_classes)) print("Output stride: {}".format(os)) print("Number of Input Channels: {}".format(nInputChannels)) super(DeepLabv3_plus, self).__init__() # Atrous Conv self.resnet_features = ResNet101(nInputChannels, os, pretrained=pretrained) # ASPP if os == 16: rates = [1, 6, 12, 18] elif os == 8: rates = [1, 12, 24, 36] else: raise NotImplementedError self.aspp1 = ASPP_module(2048, 256, rate=rates[0]) self.aspp2 = ASPP_module(2048, 256, rate=rates[1]) self.aspp3 = ASPP_module(2048, 256, rate=rates[2]) self.aspp4 = ASPP_module(2048, 256, rate=rates[3]) self.relu = nn.ReLU() self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), nn.Conv2d(2048, 256, 1, stride=1, bias=False), nn.BatchNorm2d(256), nn.ReLU()) self.conv1 = nn.Conv2d(1280, 256, 1, bias=False) self.bn1 = nn.BatchNorm2d(256) # adopt [1x1, 48] for channel reduction. self.conv2 = nn.Conv2d(256, 48, 1, bias=False) self.bn2 = nn.BatchNorm2d(48) self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, n_classes, kernel_size=1, stride=1)) def forward(self, input): x, low_level_features = self.resnet_features(input) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)), int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True) low_level_features = self.conv2(low_level_features) low_level_features = self.bn2(low_level_features) low_level_features = self.relu(low_level_features) x = torch.cat((x, low_level_features), dim=1) x = self.last_conv(x) x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True) return x def freeze_bn(self): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() def __init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) torch.nn.init.kaiming_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
繼承torch.nn庫裡面的Module類,我們的模型就是它的子類。模組還可以包含其他模組,允許將它們巢狀在樹形結構中。可以將子模組指定為常規屬性。forward函式需要我們自己重寫
class Module(object): r"""Base class for all neural network modules. Your models should also subclass this class. Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:: import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x)) Submodules assigned in this way will be registered, and will have their parameters converted too when you call `.cuda()`, etc. """ dump_patches = False r"""This allows better BC support for :meth:`load_state_dict`. In :meth:`state_dict`, the version number will be saved as in the attribute `_metadata` of the returned state dict, and thus pickled. `_metadata` is a dictionary with keys follow the naming convention of state dict. See ``_load_from_state_dict`` on how to use this information in loading. If new parameters/buffers are added/removed from a module, this number shall be bumped, and the module's `_load_from_state_dict` method can compare the version number and do appropriate changes if the state dict is from before the change.""" _version = 1 def __init__(self): self._backend = thnn_backend self._parameters = OrderedDict() self._buffers = OrderedDict() self._backward_hooks = OrderedDict() self._forward_hooks = OrderedDict() self._forward_pre_hooks = OrderedDict() self._modules = OrderedDict() self.training = True def forward(self, *input): r"""Defines the computation performed at every call. Should be overridden by all subclasses. .. note:: Although the recipe for forward pass needs to be defined within this function, one should call the :class:`Module` instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them. """ raise NotImplementedError
super(DeepLabv3_plus, self).__init__()
這是對繼承自父類的屬性進行初始化。而且是用父類的初始化方法來初始化繼承的屬性。也就是說,子類繼承了父類的所有屬性和方法,父類屬性自然會用父類方法來進行初始化。當然,如果初始化的邏輯與父類的不同,不使用父類的方法,自己重新初始化也是可以的。請參考https://www.imooc.com/qadetail/72165
# Atrous Conv
self.resnet_features = ResNet101(nInputChannels, os, pretrained=pretrained)
由上面的初始化可知number of input channels = 3, output stride = 16,這裡呼叫了一個函式如下:
def ResNet101(nInputChannels=3, os=16, pretrained=False):
model = ResNet(nInputChannels, Bottleneck, [3, 4, 23, 3], os, pretrained=pretrained)
return model
這裡又呼叫了一個函式ResNet如下:可知這是一個類,則說明上面的mode是該類的一個例項,也就是說self.resnet_features是ResNet的一個例項。
class ResNet(nn.Module):
def __init__(self, nInputChannels, block, layers, os=16, pretrained=False):
def _make_layer(self, block, planes, blocks, stride=1, rate=1):
def _make_MG_unit(self, block, planes, blocks=[1,2,4], stride=1, rate=1):
def forward(self, input):
def _init_weight(self):
def _load_pretrained_model(self):
主線繼續!根據輸出的大小來確定rate
# ASPP
if os == 16:
rates = [1, 6, 12, 18]
elif os == 8:
rates = [1, 12, 24, 36]
else:
raise NotImplementedError
然後更具不同的rate得到不同的塊
self.aspp1 = ASPP_module(2048, 256, rate=rates[0])
self.aspp2 = ASPP_module(2048, 256, rate=rates[1])
self.aspp3 = ASPP_module(2048, 256, rate=rates[2])
self.aspp4 = ASPP_module(2048, 256, rate=rates[3])
其中ASPP和上面的resnet101一樣是一個網路
class ASPP_module(nn.Module):
def __init__(self, inplanes, planes, rate):
def forward(self, x):
def _init_weight(self):
主線繼續!定義啟用層
self.relu = nn.ReLU()
定義全域性平均層,這個是一個有序的容器,神經網路模組將按照在傳入構造器的順序依次被新增到計算圖中執行,同時以神經網路模組為元素的有序字典也可以作為傳入引數。 nn.Sequential可參考https://blog.csdn.net/dss_dssssd/article/details/82980222
self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
nn.Conv2d(2048, 256, 1, stride=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU())
nn.AdaptiveAvgPool2d這個函式如下:是將輸入得到給定的大小輸出,batch和channel保持不變
The output is of size H x W, for any input size.
The number of output features is equal to the number of input planes.
Args:
output_size: the target output size of the image of the form H x W.
Can be a tuple (H, W) or a single H for a square image H x H
H and W can be either a ``int``, or ``None`` which means the size will be the same as that of the input.
Examples:
>>> # target output size of 5x7
>>> m = nn.AdaptiveAvgPool2d((5,7))
>>> input = torch.randn(1, 64, 8, 9) size=(1,64,8,9)
>>> output = m(input) size=(1,6,5,7)
>>> # target output size of 7x7 (square)
>>> m = nn.AdaptiveAvgPool2d(7)
>>> input = torch.randn(1, 64, 10, 9) size=(1,64,10,9)
>>> output = m(input) size=(1,64,7,7)
>>> # target output size of 10x7
>>> m = nn.AdaptiveMaxPool2d((None, 7))
>>> input = torch.randn(1, 64, 10, 9) size=(1,64,10,9)
>>> output = m(input) size=(1,64,10,7)
nn.Conv2d這個函式如下:和TensorFlow的卷積有不同之處,可參考https://blog.csdn.net/g11d111/article/details/82665265
class Conv2d(_ConvNd):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,padding=0, dilation=1, groups=1, bias=True)
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): 輸入的每一條邊補充0的層數 Default: 0
dilation (int or tuple, optional): 卷積核元素之間的間距 Default: 1
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
標註:groups: 控制輸入和輸出之間的連線: group=1,輸出是所有的輸入的卷積;group=2,此時相當於有並排的兩個卷積層,每個卷積層計算輸入通道的一半,並且產生的輸出是輸出通道的一半,隨後將這兩個輸出連線起來。
Shape:
- Input: `(N, C_{in}, H_{in}, W_{in})`
- Output: `(N, C_{out}, H_{out}, W_{out})`
Examples::
>>> # With square kernels and equal stride
>>> m = nn.Conv2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> # non-square kernels and unequal stride and with padding and dilation
>>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
>>> input = torch.randn(20, 16, 50, 100)
>>> output = m(input)
計算方式如下向下取整
nn.BatchNorm2函式如下:
class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True):
pass
"""
引數:
num_features: 來自期望輸入的特徵數,該期望輸入的大小為'batch_size x num_features x height x width'
eps: 為保證數值穩定性(分母不能趨近或取0),給分母加上的值。預設為1e-5。
momentum: 動態均值和動態方差所使用的動量。預設為0.1。
affine: 一個布林值,當設為true,給該層新增可學習的仿射變換引數也就是添加了weight和bias引數(learnable affine parameters)。
Shape: - 輸入:(N, C,H, W) - 輸出:(N, C, H, W)(輸入輸出相同)
"""
主程式繼續!下面定義了卷積,BN,一個時序容器(以他們傳入的順序被新增到容器中)
self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
self.bn1 = nn.BatchNorm2d(256)
# adopt [1x1, 48] for channel reduction.
self.conv2 = nn.Conv2d(256, 48, 1, bias=False)
self.bn2 = nn.BatchNorm2d(48)
self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(256, n_classes, kernel_size=1, stride=1))
接下來就是forward函數了
def forward(self, input):
x, low_level_features = self.resnet_features(input)
上面這句得到的是resnet101的forward輸出
下面得到四個不同rate的ASPP的forward輸出
x1 = self.aspp1(x)
x2 = self.aspp2(x)
x3 = self.aspp3(x)
x4 = self.aspp4(x)
然後相同的輸入經過一個時序容器
x5 = self.global_avg_pool(x)
x5向上取樣得到和x4相同的大小(h和w)
x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)
將上述所有的特徵圖堆疊在一起,使用torch.cat()函式,可參見https://blog.csdn.net/xrinosvip/article/details/81164697
在使用這個函式的時候,處理dim這一個維度,其他維度大小要保持一致。
x = torch.cat((x1, x2, x3, x4, x5), dim=1) # 在通道上面疊加
接下來的程式碼不需要解釋了:
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)),
int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)
low_level_features = self.conv2(low_level_features)
low_level_features = self.bn2(low_level_features)
low_level_features = self.relu(low_level_features)
x = torch.cat((x, low_level_features), dim=1)
x = self.last_conv(x)
x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)
return x
然後下一個函式 freeze_bn,凍結BN。
def freeze_bn(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
self.modules返回一個包含 當前模型 所有模組的迭代器。NOTE:
重複的模組只被返回一次(children()也是
)。 在下面的例子中, submodule
只會被返回一次:
# conv 和 conv1不一樣
import torch.nn as nn
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.add_module("conv", nn.Conv2d(10, 20, 4))
self.add_module("conv1", nn.Conv2d(20 ,10, 4))
model = Model()
for module in model.modules():
print(module)
# 下面是輸出
Model (
(conv): Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
(conv1): Conv2d(20, 10, kernel_size=(4, 4), stride=(1, 1))
)
Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
Conv2d(20, 10, kernel_size=(4, 4), stride=(1, 1))
# conv 和 conv1 一樣
import torch.nn as nn
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
submodule = nn.Conv2d(10, 20, 4)
self.add_module("conv", submodule)
self.add_module("conv1", submodule)
model = Model()
for module in model.modules():
print(module)
#下面是輸出
Model (
(conv): Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
(conv1): Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
)
Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1)) #重複的,複製的模組只返回一個
然後這個eval(): 將模型設定成evaluation
模式,僅僅當模型中有Dropout
和BatchNorm
是才會有影響。
拓展一下, train(mode=True): 將module
設定為 training mode
。僅僅當模型中有Dropout
和BatchNorm
是才會有影響。
類DeepLabv3_plus的最後一個函式:
def __init_weight(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
torch.nn.init.kaiming_normal_(m.weight)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
可以看到這個函式的命名僅前面有雙下劃線__,用於物件的資料封裝,以此命名的屬性或者方法為類的私有屬性或者私有方法。如果在外部直接訪問私有屬性或者方法是不可行的,這就起到了隱藏資料的作用,但是這種實現機制並不是很嚴格,機制是通過自動"變形"實現的,類中所有以雙下劃線開頭的名稱__name都會自動變為"_類名__name"的新名稱。當命名一個類屬性引起名稱衝突時使用,可以參考https://www.cnblogs.com/linxiyue/p/7944871.html
torch.nn.init.kaiming_normal_(m.weight):,Hekaiming 用一個正態分佈生成值,填充輸入的張量或變數。結果張量中的值取樣自均值為0,標準差為sqrt(2/((1 + a^2) * fan_in))的正態分佈。
torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')
"""
根據He, K等人在“Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification”中
描述的方法,用一個正態分佈生成值,填充輸入的張量或變數。
結果張量中的值取樣自均值為0,標準差為sqrt(2/((1 + a^2) * fan_in))的正態分佈。
引數:
tensor – n維的torch.Tensor或 autograd.Variable
a -這層之後使用的rectifier的斜率係數(ReLU的預設值為0)
mode -可以為“fan_in”(預設)或“fan_out”。“fan_in”保留前向傳播時權值方差的量級,“fan_out”保留反向傳播時的量級。
nonlinearity -非線性函式(`nn.functional` name),建議使用relu或者leaky_relu。其他的還有linear,sigmoid,tanh
例子
>>> w = torch.empty(3, 5)
>>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
"""
m.weight.data.fill_(1)應用了fill_(value)方法,將裡面的數值1全部賦值給m.weight這個tensor。
m.bias.data.zero_()應用了zero_()方法,將m.bias這個tensor全部為0