1. CIFAR10數據集下載
CIFAR10數據集包含10個類別,圖像尺寸為 3×32×32
官方下載地址很慢,這里給一個百度雲:
下載后在項目目錄新建一個data目錄解壓進去
2. 導入相關包
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time
import copy
MINI_BATCH = 8 # 數據集的圖片數量很大,無法一次性加載所有數據,所以一次加載一個mini-batch的圖片
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # GPU可用則使用GPU
3. 使用torchvision加載並且歸一化訓練和測試數據集
CIFAR10數據集的輸出是范圍在[0,1]之間的PILImage,我們將它轉換並歸一化范圍在[-1,1]之間的Tensor:
# ToTensor(): 將ndarrray格式的圖像轉換為Tensor張量
# Normalize(mean, std) mean:每個通道顏色平均值,這里的平均值為0.5,私人數據集自己計算;std:每個通道顏色標准偏差,(原始數據 - mean) / std 得到歸一化后的數據
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
數據加載器:
# 訓練數據加載
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=MINI_BATCH, shuffle=True, num_workers=4)
# 測試數據加載
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=MINI_BATCH, shuffle=False, num_workers=4)
4. 定義卷積神經網絡
我們實現一個簡單的神經網絡 LeNet-5來進行分類:
這個網絡具有兩個卷積層,兩個池化層,三個全連接層,原網絡用於手寫數字識別,輸入為灰度圖,這里我們輸入圖像是RGB所以修改輸入數據為 3×32×32 的Tensorr數據,輸出數據維度為 1*10 ,表示圖片屬於10個類別的概率,圖中數據維度變化說明:
- 二維卷積層輸出大小 out = (in - F + 2P) / S + 1 ,其中:
F: 卷積核大小 F×F
P: Padding,默認為0
S: 步長Stride,默認為1
如圖中第一層卷積層 (32 - 5) / 1 + 1 = 28 - 池化層輸出大小 out = (in - F) / S + 1 ,其中:
F: 池化窗口大小 F×F
S: 池化窗口移動的步長Stride,默認和池化窗口維度相同
如圖中第二層池化層 (28 - 2) / 2 + 1 = 14
這部分可以寫成一個獨立的文件,在訓練代碼中引入此文件中的網絡結構:
# net.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5) # 卷積層:3通道到6通道,卷積5*5
self.conv2 = nn.Conv2d(6, 16, 5) # 卷積層:6通道到16通道,卷積5*5
self.pool = nn.MaxPool2d(2, 2) # 池化層,在2*2窗口上進行下采樣
# 三個全連接層 :16*5*5 -> 120 -> 84 -> 10
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# 定義數據流向
def forward(self, x):
x = F.relu(self.conv1(x)) # F.relu 是一個常用的激活函數
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 16 * 5 * 5) # 變換數據維度為 1*(16*5*5),-1表示根據后面推測
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
5. 定義一個通用的訓練函數,得到最優參數
def train(model, criterion, optimizer, epochs):
since = time.time()
best_acc = 0.0 # 記錄模型測試時的最高准確率
best_model_wts = copy.deepcopy(model.state_dict()) # 記錄模型測試出的最佳參數
for epoch in range(epochs):
print('-' * 30)
print('Epoch {}/{}'.format(epoch+1, epochs))
# 訓練模型
running_loss = 0.0
for i, data in enumerate(trainloader):
inputs, labels = data
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
# 前向傳播,計算損失
outputs = model(inputs)
loss = criterion(outputs, labels)
# 反向傳播+優化
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
# 每1000批圖片打印訓練數據
if (i != 0) and (i % 1000 == 0):
print('step: {:d}, loss: {:.3f}'.format(i, running_loss/1000))
running_loss = 0.0
# 每個epoch以測試數據的整體准確率為標准測試一下模型
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
acc = correct / total
if acc > best_acc: # 當前准確率更高時更新
best_acc = acc
best_model_wts = copy.deepcopy(model.state_dict())
time_elapsed = time.time() - since
print('-' * 30)
print('訓練用時: {:.0f}m {:.0f}s'.format(time_elapsed//60, time_elapsed%60))
print('最高准確率: {}%'.format(100 * best_acc))
# 返回測試出的最佳模型
model.load_state_dict(best_model_wts)
return model
6. 定義好損失函數和優化器后訓練模型
from net import Net
net = Net()
net.to(DEVICE)
# 使用分類交叉熵 Cross-Entropy 作損失函數,動量SGD做優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 訓練10個epoch
net = train(net, criterion, optimizer, 10)
# 保存模型參數
torch.save(net.state_dict(), 'net_dict.pt')
7. 測試模型
# 圖像類別
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = Net()
net.load_state_dict(torch.load('net_dict.pt')) # 加載各層參數
net.to(DEVICE)
# 整體正確率
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('整體准確率: {}%'.format(100 * correct / total))
print('=' * 30)
# 每一個類別的正確率
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(labels.size(0)):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('{}的准確率 : {:.2f}%'.format(classes[i], 100 * class_correct[i] / class_total[i]))
8. 模型對測試集圖片的一些預測結果
import matplotlib.pyplot as plt
import numpy as np
# 定義一個顯示圖片的函數
def imshow(img):
# 輸入數據:torch.tensor[c, h, w]
img = img * 0.5 + 0.5 # 反歸一
npimg = np.transpose(img.numpy(), (1, 2, 0)) # [c, h, w] -> [h, w, c]
plt.imshow(npimg)
plt.show()
# 取一批圖片
testdata = iter(testloader)
images, labels = testdata.next()
imshow(torchvision.utils.make_grid(images))
print('真實類別: ', ' '.join('{}'.format(classes[labels[j]]) for j in range(labels.size(0))))
# 預測是10個標簽的權重,一個類別的權重越大,神經網絡越認為它是這個類別,所以輸出最高權重的標簽。
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('預測結果: ', ' '.join('{}'.format(classes[predicted[j]]) for j in range(labels.size(0))))