pytorch 参数初始化

Posted learningcaiji

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pytorch 参数初始化相关的知识,希望对你有一定的参考价值。

利用pytorch 定义自己的网络模型时,需要继承toch.nn.Module 基类。

基类中有parameters()、modules()、children()等方法

import torch
import torch.nn as nn

class myModel(nn.Module):
    def __init__(self, num_classes):
        super(myModel, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(64),
                                  nn.ReLU(True))
        self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(128),
                                  nn.ReLU(True))
        
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.avgpool = nn.AvgPool2d(2)
        
        self.fc = nn.Linear(5*5*128, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x

看一下parameters方法

mymodel = myModel(100)


for m in mymodel.parameters():
    print(---------------)
    print(m.name, m.shape)

>>>---------------
None torch.Size([64, 3, 3, 3])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([128, 64, 3, 3])
---------------
None torch.Size([128])
---------------
None torch.Size([128])


list(mymodel.parameters())
>>>[Parameter containing:
 tensor([[[[ 0.1143,  0.1445,  0.0634],
           [-0.1294, -0.1618,  0.0916],
           [-0.1492, -0.0222,  0.1498]],
 
          [[-0.1576, -0.0599,  0.0668],
           [ 0.0777,  0.1712, -0.1479],
           [-0.0921, -0.0166, -0.1750]],

看一下modules()方法

for m in mymodel.modules():
    print(---------------)
    print(m)


---------------
myModel(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc): Linear(in_features=3200, out_features=100, bias=True)
)
---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

看一下children()方法

for m in mymodel.children():
    print(---------------)
    print(m)


---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

比较一下chiildren() 方法和 modules() 方法

model.modules()会遍历model中所有的子层,而model.children()仅会遍历当前层,如上所示

所以在进行参数初始化的时候,需要运用self.modules() 【类内初始化】或者model.modules()【类外初始化】,这样可以保证初始化所以的参数

初始化w : weight.data.具体方式(normal_、fill_(1)、zero_())

初始化b : bias.data.具体方式(normal_、fill_(1)、zero_())

for m in self.modules():
    if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
    elif isinstance(m, nn.BatchNorm2D):
        m.weight.data.fill_(1)
        m.bias.data.zero_()
nn.init.kaiming_normal_
def initialize_weights(*models):
    for model in models:
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data, nonlinearity=relu)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.)
                m.bias.data.fill_(1e-4)
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0.0, 0.0001)
                m.bias.data.zero_()

 还有一个常用的方法来设置参数是否需要反向传播

model.parameters().requires_grad = False

其他获取模型信息方法

 mymodel.fc
>>>Linear(in_features=3200, out_features=100, bias=True)

 mymodel.fc.in_features
>>>3200

mymodel.conv3.in_channels
>>>128

 


以上是关于pytorch 参数初始化的主要内容,如果未能解决你的问题,请参考以下文章

PyTorch 迁移学习 (Transfer Learning) 代码详解

PyTorch 迁移学习 (Transfer Learning) 代码详解

pytorch实现网络的保存和提取

[PyTorch]PyTorch中模型的参数初始化的几种方法(转)

PyTorch学习系列——参数_初始化

pytorch 参数初始化