先說一個小知識,助於理解代碼中各個層之間維度是怎么變換的。
卷積函數:一般只用來改變輸入數據的維度,例如3維到16維。
Conv2d()
Conv2d(in_channels:int,out_channels:int,kernel_size:Union[int,tuple],stride=1,padding=o):
"""
:param in_channels: 輸入的維度
:param out_channels: 通過卷積核之后,要輸出的維度
:param kernel_size: 卷積核大小
:param stride: 移動步長
:param padding: 四周添多少個零
"""
一個小例子:
import torch
import torch.nn
# 定義一個16張照片,每個照片3個通道,大小是28*28
x= torch.randn(16,3,32,32)
# 改變照片的維度,從3維升到16維,卷積核大小是5
conv= torch.nn.Conv2d(3,16,kernel_size=5,stride=1,padding=0)
res=conv(x)
print(res.shape)
# torch.Size([16, 16, 28, 28])
# 維度升到16維,因為卷積核大小是5,步長是1,所以照片的大小縮小了,變成28
卷積神經網絡實戰之Lenet5:
下面放一個示例圖,代碼中的過程就是根據示例圖進行的

- 1.經過一個卷積層,從剛開始的[b,3,32,32]-->[b,6,28,28]
- 2.經過一個池化層,從[b,6,28,28]-->[b,6,14,14]
- 3.再經過一個卷積層,從[b,6,14,14]-->[b,16,10,10]
- 4.再經過一個池化層,從[b,16,10,10]-->[b,16,5,5]
- 5.經過三個個全連接層,將數據[b,16,5,5]-->[b,120]-->[b,84]-->[b,10]
Lenet5的構造如下:
Lenet5(
(conv_unit): Sequential(
(0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(fc_unit): Sequential(
(0): Linear(in_features=400, out_features=120, bias=True)
(1): ReLU()
(2): Linear(in_features=120, out_features=84, bias=True)
(3): ReLU()
(4): Linear(in_features=84, out_features=10, bias=True)
)
)
程序運行前,先啟動visdom,如果沒有配置好visdom環境的,先百度安裝好visdom環境
- 1.使用快捷鍵win+r,在輸入框輸出cmd,然后在命令行窗口里輸入
python -m visdom.server,啟動visdom

代碼實戰
定義一個名為
lenet5.py的文件,代碼如下
import torch
from torch import nn
import torch.optim
import torch.nn
from torch.nn import functional as F
class Lenet5(nn.Module):
# for cifar10 dataset.
def __init__(self):
super(Lenet5, self).__init__()
# 卷積層 Convolutional
self.conv_unit = nn.Sequential(
# x:[b,3,32,32]==>[b,6,28,28]
nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0),
# x:[b,6,28,28]==>[b,6,14,14]
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
#[b,6,14,14]==>[b,16,10,10]
nn.Conv2d(6,16,kernel_size=5,stride=1,padding=0),
# x:[b,16,10,10]==>[b,16,5,5]
nn.MaxPool2d(kernel_size=2,stride=2,padding=0),
)
# 全連接層fully connected
self.fc_unit=nn.Sequential(
nn.Linear(16*5*5,120),
nn.ReLU(),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84,10)
)
def forward(self,x):
"""
:param x:[b,3,32,32]
:return:
"""
batchsz=x.size(0)
# 卷積層池化層運算 [b,3,32,32]==>[b,16,5,5]
x=self.conv_unit(x)
# 對數據進行打平,方便后邊全連接層進行運算[b,16,5,5]==>[b,16*5*5]
x=x.view(batchsz,16*5*5)
# 全連接層[b,16*5*5]==>[b,10]
logits=self.fc_unit(x)
return logits
# loss=self.criteon(logits,y)
def main():
net=Lenet5()
# [b,3,32,32]
temp = torch.randn(2, 3, 32, 32)
out = net(temp)
# [b,16,5,5]
print("lenet_out:", out.shape)
if __name__ == '__main__':
main()
定義一個名為
main.py的文件,代碼如下
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn,optim
from visdom import Visdom
from lenet5 import Lenet5
def main():
batch_siz=32
cifar_train = datasets.CIFAR10('cifar',True,transform=transforms.Compose([
transforms.Resize((32,32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]),download=True)
cifar_train=DataLoader(cifar_train,batch_size=batch_siz,shuffle=True)
cifar_test = datasets.CIFAR10('cifar',False,transform=transforms.Compose([
transforms.Resize((32,32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]),download=True)
cifar_test=DataLoader(cifar_test,batch_size=batch_siz,shuffle=True)
x,label = iter(cifar_train).next()
print('x:',x.shape,'label:',label.shape)
# 指定運行到cpu //GPU
device=torch.device('cpu')
model = Lenet5().to(device)
# 調用損失函數use Cross Entropy loss交叉熵
# 分類問題使用CrossEntropyLoss比MSELoss更合適
criteon = nn.CrossEntropyLoss().to(device)
# 定義一個優化器
optimizer=optim.Adam(model.parameters(),lr=1e-3)
print(model)
viz=Visdom()
viz.line([0.],[0.],win="loss",opts=dict(title='Lenet5 Loss'))
viz.line([0.],[0.],win="acc",opts=dict(title='Lenet5 Acc'))
# 訓練train
for epoch in range(1000):
# 變成train模式
model.train()
# barchidx:下標,x:[b,3,32,32],label:[b]
for barchidx,(x,label) in enumerate(cifar_train):
# 將x,label放在gpu上
x,label=x.to(device),label.to(device)
# logits:[b,10]
# label:[b]
logits = model(x)
loss = criteon(logits,label)
# viz.line([loss.item()],[barchidx],win='loss',update='append')
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
viz.line([loss.item()],[epoch],win='loss',update='append')
print(epoch,'loss:',loss.item())
# 變成測試模式
model.eval()
with torch.no_grad():
# 測試test
# 正確的數目
total_correct=0
total_num=0
for x,label in cifar_test:
# 將x,label放在gpu上
x,label=x.to(device),label.to(device)
# [b,10]
logits=model(x)
# [b]
pred=logits.argmax(dim=1)
# [b] = [b'] 統計相等個數
total_correct+=pred.eq(label).float().sum().item()
total_num+=x.size(0)
acc=total_correct/total_num
print(epoch,'acc:',acc)
viz.line([acc],[epoch],win='acc',update='append')
# viz.images(x.view(-1, 3, 32, 32), win='x')
if __name__ == '__main__':
main()
測試結果


准確率剛開始是有一定的上升的,最高可達64%,后來准確率就慢慢的下降。
