pytorch 參數初始化


利用pytorch 定義自己的網絡模型時,需要繼承toch.nn.Module 基類。

基類中有parameters()、modules()、children()等方法

import torch
import torch.nn as nn

class myModel(nn.Module):
    def __init__(self, num_classes):
        super(myModel, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(64),
                                  nn.ReLU(True))
        self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(128),
                                  nn.ReLU(True))
        
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.avgpool = nn.AvgPool2d(2)
        
        self.fc = nn.Linear(5*5*128, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x

看一下parameters方法

mymodel = myModel(100)


for m in mymodel.parameters():
    print('---------------')
    print(m.name, m.shape)

>>>---------------
None torch.Size([64, 3, 3, 3])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([128, 64, 3, 3])
---------------
None torch.Size([128])
---------------
None torch.Size([128])


list(mymodel.parameters())
>>>[Parameter containing:
 tensor([[[[ 0.1143,  0.1445,  0.0634],
           [-0.1294, -0.1618,  0.0916],
           [-0.1492, -0.0222,  0.1498]],
 
          [[-0.1576, -0.0599,  0.0668],
           [ 0.0777,  0.1712, -0.1479],
           [-0.0921, -0.0166, -0.1750]],

看一下modules()方法

for m in mymodel.modules():
    print('---------------')
    print(m)


---------------
myModel(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc): Linear(in_features=3200, out_features=100, bias=True)
)
---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

看一下children()方法

for m in mymodel.children():
    print('---------------')
    print(m)


---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

比較一下chiildren() 方法和 modules() 方法

model.modules()會遍歷model中所有的子層,而model.children()僅會遍歷當前層,如上所示

所以在進行參數初始化的時候,需要運用self.modules() 【類內初始化】或者model.modules()【類外初始化】,這樣可以保證初始化所以的參數

初始化w : weight.data.具體方式(normal_、fill_(1)、zero_())

初始化b : bias.data.具體方式(normal_、fill_(1)、zero_())

for m in self.modules():
    if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
    elif isinstance(m, nn.BatchNorm2D):
        m.weight.data.fill_(1)
        m.bias.data.zero_()
nn.init.kaiming_normal_
def initialize_weights(*models):
    for model in models:
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.)
                m.bias.data.fill_(1e-4)
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0.0, 0.0001)
                m.bias.data.zero_()

 還有一個常用的方法來設置參數是否需要反向傳播

model.parameters().requires_grad = False

其他獲取模型信息方法

 mymodel.fc
>>>Linear(in_features=3200, out_features=100, bias=True)

 mymodel.fc.in_features
>>>3200

mymodel.conv3.in_channels
>>>128

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM