先說一個小知識,助於理解代碼中各個層之間維度是怎么變換的。
卷積函數:一般只用來改變輸入數據的維度,例如3維到16維。
Conv2d()
Conv2d(in_channels:int,out_channels:int,kernel_size:Union[int,tuple],stride=1,padding=o):
"""
:param in_channels: 輸入的維度
:param out_channels: 通過卷積核之后,要輸出的維度
:param kernel_size: 卷積核大小
:param stride: 移動步長
:param padding: 四周添多少個零
"""
一個小例子:
import torch
import torch.nn
# 定義一個16張照片,每個照片3個通道,大小是28*28
x= torch.randn(16,3,32,32)
# 改變照片的維度,從3維升到16維,卷積核大小是5
conv= torch.nn.Conv2d(3,16,kernel_size=5,stride=1,padding=0)
res=conv(x)
print(res.shape)
# torch.Size([16, 16, 28, 28])
# 維度升到16維,因為卷積核大小是5,步長是1,所以照片的大小縮小了,變成28
卷積神經網絡實戰之Lenet5:
下面放一個示例圖,代碼中的過程就是根據示例圖進行的
- 1.經過一個卷積層,從剛開始的[b,3,32,32]-->[b,6,28,28]
- 2.經過一個池化層,從[b,6,28,28]-->[b,6,14,14]
- 3.再經過一個卷積層,從[b,6,14,14]-->[b,16,10,10]
- 4.再經過一個池化層,從[b,16,10,10]-->[b,16,5,5]
- 5.經過三個個全連接層,將數據[b,16,5,5]-->[b,120]-->[b,84]-->[b,10]
Lenet5的構造如下:
Lenet5(
(conv_unit): Sequential(
(0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(fc_unit): Sequential(
(0): Linear(in_features=400, out_features=120, bias=True)
(1): ReLU()
(2): Linear(in_features=120, out_features=84, bias=True)
(3): ReLU()
(4): Linear(in_features=84, out_features=10, bias=True)
)
)
程序運行前,先啟動visdom,如果沒有配置好visdom環境的,先百度安裝好visdom環境
- 1.使用快捷鍵win+r,在輸入框輸出cmd,然后在命令行窗口里輸入
python -m visdom.server
,啟動visdom
代碼實戰
定義一個名為
lenet5.py
的文件,代碼如下
import torch
from torch import nn
import torch.optim
import torch.nn
from torch.nn import functional as F
class Lenet5(nn.Module):
# for cifar10 dataset.
def __init__(self):
super(Lenet5, self).__init__()
# 卷積層 Convolutional
self.conv_unit = nn.Sequential(
# x:[b,3,32,32]==>[b,6,28,28]
nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0),
# x:[b,6,28,28]==>[b,6,14,14]
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
#[b,6,14,14]==>[b,16,10,10]
nn.Conv2d(6,16,kernel_size=5,stride=1,padding=0),
# x:[b,16,10,10]==>[b,16,5,5]
nn.MaxPool2d(kernel_size=2,stride=2,padding=0),
)
# 全連接層fully connected
self.fc_unit=nn.Sequential(
nn.Linear(16*5*5,120),
nn.ReLU(),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84,10)
)
def forward(self,x):
"""
:param x:[b,3,32,32]
:return:
"""
batchsz=x.size(0)
# 卷積層池化層運算 [b,3,32,32]==>[b,16,5,5]
x=self.conv_unit(x)
# 對數據進行打平,方便后邊全連接層進行運算[b,16,5,5]==>[b,16*5*5]
x=x.view(batchsz,16*5*5)
# 全連接層[b,16*5*5]==>[b,10]
logits=self.fc_unit(x)
return logits
# loss=self.criteon(logits,y)
def main():
net=Lenet5()
# [b,3,32,32]
temp = torch.randn(2, 3, 32, 32)
out = net(temp)
# [b,16,5,5]
print("lenet_out:", out.shape)
if __name__ == '__main__':
main()
定義一個名為
main.py
的文件,代碼如下
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn,optim
from visdom import Visdom
from lenet5 import Lenet5
def main():
batch_siz=32
cifar_train = datasets.CIFAR10('cifar',True,transform=transforms.Compose([
transforms.Resize((32,32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]),download=True)
cifar_train=DataLoader(cifar_train,batch_size=batch_siz,shuffle=True)
cifar_test = datasets.CIFAR10('cifar',False,transform=transforms.Compose([
transforms.Resize((32,32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]),download=True)
cifar_test=DataLoader(cifar_test,batch_size=batch_siz,shuffle=True)
x,label = iter(cifar_train).next()
print('x:',x.shape,'label:',label.shape)
# 指定運行到cpu //GPU
device=torch.device('cpu')
model = Lenet5().to(device)
# 調用損失函數use Cross Entropy loss交叉熵
# 分類問題使用CrossEntropyLoss比MSELoss更合適
criteon = nn.CrossEntropyLoss().to(device)
# 定義一個優化器
optimizer=optim.Adam(model.parameters(),lr=1e-3)
print(model)
viz=Visdom()
viz.line([0.],[0.],win="loss",opts=dict(title='Lenet5 Loss'))
viz.line([0.],[0.],win="acc",opts=dict(title='Lenet5 Acc'))
# 訓練train
for epoch in range(1000):
# 變成train模式
model.train()
# barchidx:下標,x:[b,3,32,32],label:[b]
for barchidx,(x,label) in enumerate(cifar_train):
# 將x,label放在gpu上
x,label=x.to(device),label.to(device)
# logits:[b,10]
# label:[b]
logits = model(x)
loss = criteon(logits,label)
# viz.line([loss.item()],[barchidx],win='loss',update='append')
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
viz.line([loss.item()],[epoch],win='loss',update='append')
print(epoch,'loss:',loss.item())
# 變成測試模式
model.eval()
with torch.no_grad():
# 測試test
# 正確的數目
total_correct=0
total_num=0
for x,label in cifar_test:
# 將x,label放在gpu上
x,label=x.to(device),label.to(device)
# [b,10]
logits=model(x)
# [b]
pred=logits.argmax(dim=1)
# [b] = [b'] 統計相等個數
total_correct+=pred.eq(label).float().sum().item()
total_num+=x.size(0)
acc=total_correct/total_num
print(epoch,'acc:',acc)
viz.line([acc],[epoch],win='acc',update='append')
# viz.images(x.view(-1, 3, 32, 32), win='x')
if __name__ == '__main__':
main()
測試結果
准確率剛開始是有一定的上升的,最高可達64%,后來准確率就慢慢的下降。