Pytorch_模型轉Caffe(二)解析Pytorch模型*.pth
阿新 • • 發佈:2020-12-17
目錄
Pytorch_模型轉Caffe(二)解析Pytorch模型*.pth
1. Pytorch模型保存於讀取
a. 儲存、載入權重
# 模型儲存(僅儲存權重) torch.save(model_object.state_dict(), './weights.pth') # 模型載入(先建立模型,、再匯入權重) model = AlexNet(**kwargs) model.load_state_dict(torch.load('./weights.pth'))
b.儲存、載入網路和權重
# 模型儲存(僅儲存權重)
torch.save(model_object, './model.pth')
# 模型載入(先建立模型,、再匯入權重)
model = torch.load('./model.pth')
2. Pytorch模型結構
Pytorch生成的檔案為.pth或.pt
1). summary檢視網路整體結構
- 首先安裝torchsummary
pip install torchsummary
- 以AelxNet為例,載入預訓練模型,檢視網路結構
import torch from torch.autograd import Variable from torchvision.models.alexnet import alexnet from torchsummary import summary if __name__=='__main__': name='alexnet' net=alexnet(True) print(type(net)) #<class 'torchvision.models.alexnet.AlexNet'> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = net.to(device) summary(model, (3,227,227)) """ # 網路結構 ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 64, 56, 56] 23,296 ReLU-2 [-1, 64, 56, 56] 0 MaxPool2d-3 [-1, 64, 27, 27] 0 Conv2d-4 [-1, 192, 27, 27] 307,392 ReLU-5 [-1, 192, 27, 27] 0 MaxPool2d-6 [-1, 192, 13, 13] 0 Conv2d-7 [-1, 384, 13, 13] 663,936 ReLU-8 [-1, 384, 13, 13] 0 Conv2d-9 [-1, 256, 13, 13] 884,992 ReLU-10 [-1, 256, 13, 13] 0 Conv2d-11 [-1, 256, 13, 13] 590,080 ReLU-12 [-1, 256, 13, 13] 0 MaxPool2d-13 [-1, 256, 6, 6] 0 AdaptiveAvgPool2d-14 [-1, 256, 6, 6] 0 Dropout-15 [-1, 9216] 0 Linear-16 [-1, 4096] 37,752,832 ReLU-17 [-1, 4096] 0 Dropout-18 [-1, 4096] 0 Linear-19 [-1, 4096] 16,781,312 ReLU-20 [-1, 4096] 0 Linear-21 [-1, 1000] 4,097,000 ================================================================ Total params: 61,100,840 Trainable params: 61,100,840 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.59 Forward/backward pass size (MB): 8.49 Params size (MB): 233.08 Estimated Total Size (MB): 242.16 ---------------------------------------------------------------- """
2). net.state_dict()解析權重值
net.state_dict()返回字典,key為layer名稱,value為weights與bias
- 只有那些引數可以訓練的layer才會被儲存到模型的state_dict中
import torch from torch.autograd import Variable from torchvision.models.alexnet import alexnet from torchsummary import summary if __name__=='__main__': name='alexnet' net=alexnet(True) print(type(net.state_dict())) #<class 'collections.OrderedDict'> # 只有那些引數可以訓練的layer才會被儲存到模型的state_dict中,如卷積層,線性層等等,像什麼池化層、BN層這些本身沒有引數的層是沒有在這個字典中的; for param_tensor in net.state_dict(): # 字典的遍歷預設是遍歷 key,所以param_tensor實際上是鍵值 print(param_tensor,'\t',net.state_dict()[param_tensor].size()) """ features.0.weight torch.Size([64, 3, 11, 11]) features.0.bias torch.Size([64]) features.3.weight torch.Size([192, 64, 5, 5]) features.3.bias torch.Size([192]) features.6.weight torch.Size([384, 192, 3, 3]) features.6.bias torch.Size([384]) features.8.weight torch.Size([256, 384, 3, 3]) features.8.bias torch.Size([256]) features.10.weight torch.Size([256, 256, 3, 3]) features.10.bias torch.Size([256]) classifier.1.weight torch.Size([4096, 9216]) classifier.1.bias torch.Size([4096]) classifier.4.weight torch.Size([4096, 4096]) classifier.4.bias torch.Size([4096]) classifier.6.weight torch.Size([1000, 4096]) classifier.6.bias torch.Size([1000]) """
3). net.named_parameters()獲取layer和weight
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
# 網路引數
for layer in net.named_parameters():
layer_name = layer[0]
layer_weight = layer[1].size()
print(layer_name,' ',layer_weight)
"""
features.0.weight torch.Size([64, 3, 11, 11])
features.0.bias torch.Size([64])
features.3.weight torch.Size([192, 64, 5, 5])
features.3.bias torch.Size([192])
features.6.weight torch.Size([384, 192, 3, 3])
features.6.bias torch.Size([384])
features.8.weight torch.Size([256, 384, 3, 3])
features.8.bias torch.Size([256])
features.10.weight torch.Size([256, 256, 3, 3])
features.10.bias torch.Size([256])
classifier.1.weight torch.Size([4096, 9216])
classifier.1.bias torch.Size([4096])
classifier.4.weight torch.Size([4096, 4096])
classifier.4.bias torch.Size([4096])
classifier.6.weight torch.Size([1000, 4096])
classifier.6.bias torch.Size([1000])
"""
4). net.named_modules()
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
for name,layer in net.named_modules():
print(name,'-->',layer)
"""
--> AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
features --> Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
features.0 --> Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
features.1 --> ReLU(inplace=True)
features.2 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.3 --> Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
features.4 --> ReLU(inplace=True)
features.5 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.6 --> Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.7 --> ReLU(inplace=True)
features.8 --> Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.9 --> ReLU(inplace=True)
features.10 --> Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.11 --> ReLU(inplace=True)
features.12 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
avgpool --> AdaptiveAvgPool2d(output_size=(6, 6))
classifier --> Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
classifier.0 --> Dropout(p=0.5, inplace=False)
classifier.1 --> Linear(in_features=9216, out_features=4096, bias=True)
classifier.2 --> ReLU(inplace=True)
classifier.3 --> Dropout(p=0.5, inplace=False)
classifier.4 --> Linear(in_features=4096, out_features=4096, bias=True)
classifier.5 --> ReLU(inplace=True)
classifier.6 --> Linear(in_features=4096, out_features=1000, bias=True)
"""