1. Pytorch模型保存于读取

a. 保存、加载权重

# 模型保存(仅保存权重)
torch.save(model_object.state_dict(), './weights.pth')
# 模型加载(先创建模型,、再导入权重)
model = AlexNet(**kwargs)
model.load_state_dict(torch.load('./weights.pth'))

b.保存、加载网络和权重

# 模型保存(仅保存权重)
torch.save(model_object, './model.pth')
# 模型加载(先创建模型,、再导入权重)
model = torch.load('./model.pth')

2. Pytorch模型结构

Pytorch生成的文件为.pth或.pt

1). summary查看网络整体结构

  • 首先安装torchsummary pip install torchsummary
  • 以AelxNet为例,加载预训练模型,查看网络结构
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
    name='alexnet'
    net=alexnet(True)
    print(type(net))               #<class 'torchvision.models.alexnet.AlexNet'>
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = net.to(device)
    summary(model, (3,227,227))
"""
# 网络结构
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 64, 56, 56]          23,296
              ReLU-2           [-1, 64, 56, 56]               0
         MaxPool2d-3           [-1, 64, 27, 27]               0
            Conv2d-4          [-1, 192, 27, 27]         307,392
              ReLU-5          [-1, 192, 27, 27]               0
         MaxPool2d-6          [-1, 192, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         663,936
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 256, 13, 13]         884,992
             ReLU-10          [-1, 256, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         590,080
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
AdaptiveAvgPool2d-14            [-1, 256, 6, 6]               0
          Dropout-15                 [-1, 9216]               0
           Linear-16                 [-1, 4096]      37,752,832
             ReLU-17                 [-1, 4096]               0
          Dropout-18                 [-1, 4096]               0
           Linear-19                 [-1, 4096]      16,781,312
             ReLU-20                 [-1, 4096]               0
           Linear-21                 [-1, 1000]       4,097,000
================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.59
Forward/backward pass size (MB): 8.49
Params size (MB): 233.08
Estimated Total Size (MB): 242.16
----------------------------------------------------------------
"""

2). net.state_dict()解析权重值

net.state_dict()返回字典,key为layer名称,value为weights与bias

  • 只有那些参数可以训练的layer才会被保存到模型的state_dict中
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
    name='alexnet'
    net=alexnet(True)
    print(type(net.state_dict()))  #<class 'collections.OrderedDict'>
    # 只有那些参数可以训练的layer才会被保存到模型的state_dict中,如卷积层,线性层等等,像什么池化层、BN层这些本身没有参数的层是没有在这个字典中的;
    for param_tensor in net.state_dict(): # 字典的遍历默认是遍历 key,所以param_tensor实际上是键值
        print(param_tensor,'\t',net.state_dict()[param_tensor].size())
"""
features.0.weight        torch.Size([64, 3, 11, 11])
features.0.bias          torch.Size([64])
features.3.weight        torch.Size([192, 64, 5, 5])
features.3.bias          torch.Size([192])
features.6.weight        torch.Size([384, 192, 3, 3])
features.6.bias          torch.Size([384])
features.8.weight        torch.Size([256, 384, 3, 3])
features.8.bias          torch.Size([256])
features.10.weight       torch.Size([256, 256, 3, 3])
features.10.bias         torch.Size([256])
classifier.1.weight      torch.Size([4096, 9216])
classifier.1.bias        torch.Size([4096])
classifier.4.weight      torch.Size([4096, 4096])
classifier.4.bias        torch.Size([4096])
classifier.6.weight      torch.Size([1000, 4096])
classifier.6.bias        torch.Size([1000])
"""

3). net.named_parameters()获取layer和weight

import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
    name='alexnet'
    net=alexnet(True)
    # 网络参数
    for layer in net.named_parameters():
        layer_name = layer[0]
        layer_weight = layer[1].size()
        print(layer_name,'   ',layer_weight)
"""
features.0.weight     torch.Size([64, 3, 11, 11])
features.0.bias     torch.Size([64])
features.3.weight     torch.Size([192, 64, 5, 5])
features.3.bias     torch.Size([192])
features.6.weight     torch.Size([384, 192, 3, 3])
features.6.bias     torch.Size([384])
features.8.weight     torch.Size([256, 384, 3, 3])
features.8.bias     torch.Size([256])
features.10.weight     torch.Size([256, 256, 3, 3])
features.10.bias     torch.Size([256])
classifier.1.weight     torch.Size([4096, 9216])
classifier.1.bias     torch.Size([4096])
classifier.4.weight     torch.Size([4096, 4096])
classifier.4.bias     torch.Size([4096])
classifier.6.weight     torch.Size([1000, 4096])
classifier.6.bias     torch.Size([1000])
"""

4). net.named_modules()

import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
    name='alexnet'
    net=alexnet(True)
    for name,layer in net.named_modules():
        print(name,'-->',layer)
"""
 --> AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
features --> Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace=True)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace=True)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
features.0 --> Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
features.1 --> ReLU(inplace=True)
features.2 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.3 --> Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
features.4 --> ReLU(inplace=True)
features.5 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.6 --> Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.7 --> ReLU(inplace=True)
features.8 --> Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.9 --> ReLU(inplace=True)
features.10 --> Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.11 --> ReLU(inplace=True)
features.12 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
avgpool --> AdaptiveAvgPool2d(output_size=(6, 6))
classifier --> Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)
classifier.0 --> Dropout(p=0.5, inplace=False)
classifier.1 --> Linear(in_features=9216, out_features=4096, bias=True)
classifier.2 --> ReLU(inplace=True)
classifier.3 --> Dropout(p=0.5, inplace=False)
classifier.4 --> Linear(in_features=4096, out_features=4096, bias=True)
classifier.5 --> ReLU(inplace=True)
classifier.6 --> Linear(in_features=4096, out_features=1000, bias=True)
"""