卷積神經網絡中nn.Conv2d()和nn.MaxPool2d()

卷積神經網絡之Pythorch實現：

`nn.Conv2d()`就是PyTorch中的卷積模塊

參數列表

參數	作用
in_channels	輸入數據體的深度
out_channels	輸出數據體的深度
kernel_size	濾波器（卷積核）的大小注1
stride	滑動的步長
padding	零填充的圈數注2
bias	是否啟用偏置，默認是True，代表啟用
groups	輸出數據體深度上和輸入數據體深度上的聯系注3
dilation	卷積對於輸入數據體的空間間隔注4

注：1. 可以使用一個數字來表示高和寬相同的卷積核，比如 kernel_size=3，也可以使用不同的數字來表示高和寬不同的卷積核，比如 kernel_size=(3, 2)；

padding=0表示四周不進行零填充，而 padding=1表示四周進行1個像素點的零填充；
groups表示輸出數據體深度上和輸入數據體深度上的聯系，默認 groups=1，也就是所有的輸出和輸入都是相關聯的，如果 groups=2，這表示輸入的深度被分割成兩份，輸出的深度也被分割成兩份，它們之間分別對應起來，所以要求輸出和輸入都必須要能被 groups整除。
默認dilation=1詳情見 nn.Conv2d()中dilation參數的作用或者CSDN

`nn.MaxPool2d()`表示網絡中的最大值池化

參數列表：

參數	作用
kernel_size	與上面`nn.Conv2d()`相同
stride	與上面`nn.Conv2d()`相同
padding	與上面`nn.Conv2d()`相同
dilation	與上面`nn.Conv2d()`相同
return_indices	表示是否返回最大值所處的下標，默認 return_indices=False
ceil_mode	表示使用一些方格代替層結構，默認 ceil_mode=False

注：一般不會去設置return_indices和ceil_mode參數

import torch.nn as nn


class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        layer1 = nn.Sequential()
        # 把一個三通道的照片RGB三個使用32組卷積核卷積，每組三個卷積核，組內卷積后相加得出32組輸出
        layer1.add_module('conv1', nn.Conv2d(3, 32, (3, 3), (1, 1), padding=1))
        layer1.add_module('relu1', nn.ReLU(True))
        layer1.add_module('pool1', nn.MaxPool2d(2, 2))
        self.layer1 = layer1

        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(32, 64, (3, 3), (1, 1), padding=1))
        layer2.add_module('relu2', nn.ReLU(True))
        layer2.add_module('pool2', nn.MaxPool2d(2, 2))
        self.layer2 = layer2

        layer3 = nn.Sequential()
        layer3.add_module('conv3', nn.Conv2d(64, 128, (3, 3), (1, 1), padding=1))
        layer3.add_module('relu3', nn.ReLU(True))
        layer3.add_module('pool3', nn.MaxPool2d(2, 2))
        self.layer3 = layer3

        layer4 = nn.Sequential()
        layer4.add_module('fc1', nn.Linear(2048, 512))
        layer4.add_module('fc_relu1', nn.ReLU(True))
        layer4.add_module('fc2', nn.Linear(512, 64))
        layer4.add_module('fc_relu2', nn.ReLU(True))
        layer4.add_module('f3', nn.Linear(64, 10))
        self.layer4 = layer4

    def forward(self, x):
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        fc_input = conv3.view(conv3.size(0), -1)
        fc_out = self.layer4(fc_input)
        return fc_out

model = SimpleCNN()
print(model)

輸出

SimpleCNN(
  (layer1): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer4): Sequential(
    (fc1): Linear(in_features=2048, out_features=512, bias=True)
    (fc_relu1): ReLU(inplace=True)
    (fc2): Linear(in_features=512, out_features=64, bias=True)
    (fc_relu2): ReLU(inplace=True)
    (f3): Linear(in_features=64, out_features=10, bias=True)
  )
)

提取模型的層級結構

提取層級結構可以使用以下幾個nn.Model的屬性，第一個是children()屬性，它會返回下一級模塊的迭代器，在上面這個模型中，它會返回在self.layer1，self.layer2，self.layer4上的迭代器而不會返回它們內部的東西；modules()
會返回模型中所有的模塊的迭代器，這樣它就能訪問到最內層，比如self.layer1.conv1這個模塊；還有一個與它們相對應的是name_children()屬性以及named_modules()，這兩個不僅會返回模塊的迭代器，還會返回網絡層的名字。

提取出model中的前兩層

nn.Sequential(*list(model.children())[:2])

輸出：

Sequential(
  (0): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)

提取出model中的所有卷積層

conv_model = nn.Sequential()
for layer in model.named_modules():
    if isinstance(layer[1], nn.Conv2d):
        conv_model.add_module(layer[0].split('.')[1] ,layer[1])
print(conv_model)

輸出：

Sequential(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

提取網絡參數並對其初始化

nn.Moudel里面有兩個特別重要的關於參數的屬性，分別是named_parameters()和parameters()。前者會輸出網絡層的名字和參數的迭代器，后者會給出一個網絡的全部參數的迭代器。

for param in model.named_parameters():
    print(param[0])
    # print(param[1])

輸出：

layer1.conv1.weight
layer1.conv1.bias
layer2.conv2.weight
layer2.conv2.bias
layer3.conv3.weight
layer3.conv3.bias
layer4.fc1.weight
layer4.fc1.bias
layer4.fc2.weight
layer4.fc2.bias
layer4.f3.weight
layer4.f3.bias

主流神經網絡案例分析

案例：使用卷積神經網絡實現對Minist數據集的預測

import matplotlib.pyplot as plt
import torch.utils.data
import torchvision.datasets
import os
import torch.nn as nn
from torchvision import transforms


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3, 3)),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=(3, 3)),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(3, 3)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3, 3)),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = nn.Sequential(
            nn.Linear(128 * 4 * 4, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 128),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x



os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

data_tf = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.5], [0.5])]
)

train_dataset = torchvision.datasets.MNIST(root='F:/機器學習/pytorch/書/data/mnist', train=True,
                                           transform=data_tf, download=True)

test_dataset = torchvision.datasets.MNIST(root='F:/機器學習/pytorch/書/data/mnist', train=False,
                                          transform=data_tf, download=True)

batch_size = 100
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size
)

model = CNN()
model = model.cuda()
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()
optimizer = torch.optim.Adam(model.parameters())

# 節約時間，三次夠了
iter_step = 3
loss1 = []
loss2 = []
for step in range(iter_step):
    loss1_count = 0
    loss2_count = 0
    for images, labels in train_loader:
        images = images.cuda()
        labels = labels.cuda()
        images = images.reshape(-1, 1, 28, 28)
        output = model(images)
        pred = output.squeeze()

        optimizer.zero_grad()
        loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()

        _, pred = torch.max(pred, 1)

        loss1_count += int(torch.sum(pred == labels)) / 100
# 測試
    else:
        test_loss = 0
        accuracy = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.cuda()
                labels = labels.cuda()
                pred = model(images.reshape(-1, 1, 28, 28))
                _, pred = torch.max(pred, 1)
                loss2_count += int(torch.sum(pred == labels)) / 100

    loss1.append(loss1_count / len(train_loader))
    loss2.append(loss2_count / len(test_loader))

    print(f'第{step}次訓練：訓練准確率：{loss1[len(loss1)-1]}，測試准確率：{loss2[len(loss2)-1]}')

plt.plot(loss1, label='Training loss')
plt.plot(loss2, label='Validation loss')
plt.legend()

輸出：

第0次訓練：訓練准確率：0.9646166666666718，測試准確率：0.9868999999999996
第1次訓練：訓練准確率：0.9865833333333389，測試准確率：0.9908999999999998
第2次訓練：訓練准確率：0.9917000000000039，測試准確率：0.9879999999999994
<matplotlib.legend.Legend at 0x21f03092fd0>

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 【python實現卷積神經網絡】卷積層Conv2D實現（帶stride、padding）【python實現卷積神經網絡】卷積層Conv2D反向傳播過程卷積神經網絡CNN識別MNIST數據集 TensorFlow訓練MNIST數據集（3） —— 卷積神經網絡卷積神經網絡對圖片分類-中如何理解3D卷積神經網絡核中的參數？ Tensorflow學習教程------利用卷積神經網絡對mnist數據集進行分類_訓練模型 Tensorflow暑期實踐——使用卷積神經網絡對CIFAR-10數據集進行分類卷積神經網絡卷積神經網絡