卷積神經網絡中nn.Conv2d()和nn.MaxPool2d()
卷積神經網絡之Pythorch實現:
nn.Conv2d()
就是PyTorch中的卷積模塊
參數列表
參數 | 作用 |
---|---|
in_channels | 輸入數據體的深度 |
out_channels | 輸出數 據體的深度 |
kernel_size | 濾波器(卷積核)的大小 注1 |
stride | 滑動的步長 |
padding | 零填充的圈數 注2 |
bias | 是否啟用偏置,默認是True,代表啟用 |
groups | 輸出數據體深度上和輸入數 據體深度上的聯系 注3 |
dilation | 卷積對於輸入數據體的空間間隔 注4 |
注:1. 可以使用一 個數字來表示高和寬相同的卷積核,比如 kernel_size=3,也可以使用 不同的數字來表示高和寬不同的卷積核,比如 kernel_size=(3, 2);
-
padding=0表示四周不進行零填充,而 padding=1表示四周進行1個像素點的零填充;
-
groups表示輸出數據體深度上和輸入數 據體深度上的聯系,默認 groups=1,也就是所有的輸出和輸入都是相 關聯的,如果 groups=2,這表示輸入的深度被分割成兩份,輸出的深 度也被分割成兩份,它們之間分別對應起來,所以要求輸出和輸入都 必須要能被 groups整除。
-
默認dilation=1詳情見 nn.Conv2d()中dilation參數的作用或者CSDN
nn.MaxPool2d()
表示網絡中的最大值池化
參數列表:
參數 | 作用 |
---|---|
kernel_size | 與上面nn.Conv2d() 相同 |
stride | 與上面nn.Conv2d() 相同 |
padding | 與上面nn.Conv2d() 相同 |
dilation | 與上面nn.Conv2d() 相同 |
return_indices | 表示是否返回最大值所處的下標,默認 return_indices=False |
ceil_mode | 表示使用一些方格代替層結構,默認 ceil_mode=False |
注:一般不會去設置return_indices
和ceil_mode
參數
import torch.nn as nn
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
layer1 = nn.Sequential()
# 把一個三通道的照片RGB三個使用32組卷積核卷積,每組三個卷積核,組內卷積后相加得出32組輸出
layer1.add_module('conv1', nn.Conv2d(3, 32, (3, 3), (1, 1), padding=1))
layer1.add_module('relu1', nn.ReLU(True))
layer1.add_module('pool1', nn.MaxPool2d(2, 2))
self.layer1 = layer1
layer2 = nn.Sequential()
layer2.add_module('conv2', nn.Conv2d(32, 64, (3, 3), (1, 1), padding=1))
layer2.add_module('relu2', nn.ReLU(True))
layer2.add_module('pool2', nn.MaxPool2d(2, 2))
self.layer2 = layer2
layer3 = nn.Sequential()
layer3.add_module('conv3', nn.Conv2d(64, 128, (3, 3), (1, 1), padding=1))
layer3.add_module('relu3', nn.ReLU(True))
layer3.add_module('pool3', nn.MaxPool2d(2, 2))
self.layer3 = layer3
layer4 = nn.Sequential()
layer4.add_module('fc1', nn.Linear(2048, 512))
layer4.add_module('fc_relu1', nn.ReLU(True))
layer4.add_module('fc2', nn.Linear(512, 64))
layer4.add_module('fc_relu2', nn.ReLU(True))
layer4.add_module('f3', nn.Linear(64, 10))
self.layer4 = layer4
def forward(self, x):
conv1 = self.layer1(x)
conv2 = self.layer2(conv1)
conv3 = self.layer3(conv2)
fc_input = conv3.view(conv3.size(0), -1)
fc_out = self.layer4(fc_input)
return fc_out
model = SimpleCNN()
print(model)
輸出
SimpleCNN(
(layer1): Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(layer2): Sequential(
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
(relu2): ReLU(inplace=True)
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(layer3): Sequential(
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU(inplace=True)
(pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(layer4): Sequential(
(fc1): Linear(in_features=2048, out_features=512, bias=True)
(fc_relu1): ReLU(inplace=True)
(fc2): Linear(in_features=512, out_features=64, bias=True)
(fc_relu2): ReLU(inplace=True)
(f3): Linear(in_features=64, out_features=10, bias=True)
)
)
提取模型的層級結構
提取層級結構可以使用以下幾個nn.Model
的屬性,第一個是children()
屬性,它會返回下一級模塊的迭代器,在上面這個模型中,它會返回在self.layer1,self.layer2,self.layer4上的迭代器而不會返回它們內部的東西;modules()
會返回模型中所有的模塊的迭代器,這樣它就能訪問到最內層,比如self.layer1.conv1這個模塊;還有一個與它們相對應的是name_children()
屬性以及named_modules()
,這兩個不僅會返回模塊的迭代器,還會返回網絡層的名字。
提取出model中的前兩層
nn.Sequential(*list(model.children())[:2])
輸出:
Sequential(
(0): Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(1): Sequential(
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu2): ReLU(inplace=True)
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
提取出model中的所有卷積層
conv_model = nn.Sequential()
for layer in model.named_modules():
if isinstance(layer[1], nn.Conv2d):
conv_model.add_module(layer[0].split('.')[1] ,layer[1])
print(conv_model)
輸出:
Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
提取網絡參數並對其初始化
nn.Moudel
里面有兩個特別重要的關於參數的屬性,分別是named_parameters()
和parameters()
。前者會輸出網絡層的名字和參數的迭代器,后者會給出一個網絡的全部參數的迭代器。
for param in model.named_parameters():
print(param[0])
# print(param[1])
輸出:
layer1.conv1.weight
layer1.conv1.bias
layer2.conv2.weight
layer2.conv2.bias
layer3.conv3.weight
layer3.conv3.bias
layer4.fc1.weight
layer4.fc1.bias
layer4.fc2.weight
layer4.fc2.bias
layer4.f3.weight
layer4.f3.bias
案例:使用卷積神經網絡實現對Minist數據集的預測
import matplotlib.pyplot as plt
import torch.utils.data
import torchvision.datasets
import os
import torch.nn as nn
from torchvision import transforms
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=(3, 3)),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
)
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=(3, 3)),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.layer3 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=(3, 3)),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True)
)
self.layer4 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=(3, 3)),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Linear(128 * 4 * 4, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 128),
nn.Linear(128, 10)
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
data_tf = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])]
)
train_dataset = torchvision.datasets.MNIST(root='F:/機器學習/pytorch/書/data/mnist', train=True,
transform=data_tf, download=True)
test_dataset = torchvision.datasets.MNIST(root='F:/機器學習/pytorch/書/data/mnist', train=False,
transform=data_tf, download=True)
batch_size = 100
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset, batch_size=batch_size
)
test_loader = torch.utils.data.DataLoader(
dataset=test_dataset, batch_size=batch_size
)
model = CNN()
model = model.cuda()
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()
optimizer = torch.optim.Adam(model.parameters())
# 節約時間,三次夠了
iter_step = 3
loss1 = []
loss2 = []
for step in range(iter_step):
loss1_count = 0
loss2_count = 0
for images, labels in train_loader:
images = images.cuda()
labels = labels.cuda()
images = images.reshape(-1, 1, 28, 28)
output = model(images)
pred = output.squeeze()
optimizer.zero_grad()
loss = criterion(pred, labels)
loss.backward()
optimizer.step()
_, pred = torch.max(pred, 1)
loss1_count += int(torch.sum(pred == labels)) / 100
# 測試
else:
test_loss = 0
accuracy = 0
with torch.no_grad():
for images, labels in test_loader:
images = images.cuda()
labels = labels.cuda()
pred = model(images.reshape(-1, 1, 28, 28))
_, pred = torch.max(pred, 1)
loss2_count += int(torch.sum(pred == labels)) / 100
loss1.append(loss1_count / len(train_loader))
loss2.append(loss2_count / len(test_loader))
print(f'第{step}次訓練:訓練准確率:{loss1[len(loss1)-1]},測試准確率:{loss2[len(loss2)-1]}')
plt.plot(loss1, label='Training loss')
plt.plot(loss2, label='Validation loss')
plt.legend()
輸出:
第0次訓練:訓練准確率:0.9646166666666718,測試准確率:0.9868999999999996
第1次訓練:訓練准確率:0.9865833333333389,測試准確率:0.9908999999999998
第2次訓練:訓練准確率:0.9917000000000039,測試准確率:0.9879999999999994
<matplotlib.legend.Legend at 0x21f03092fd0>