基於CNN的CIFAR10圖像分類
完整代碼如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
import torchvision.transforms as transforms
import numpy as np
import time
from matplotlib import pyplot as plt
# ===========================================================================================
# 准備數據
# Compose的意思是將多個transform組合在一起用,ToTensor 將像素轉化為[0,1]的數字,Normalize則正則化變為 [-1,1]
tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 下載數據集,訓練集:需要訓練;測試集:不需要訓練
train_set = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=tf)
test_set = torchvision.datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=tf)
# 指定十個類別的標簽,有的數據集很大的回加載相應的標簽文件(groundtruth)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'truck', 'ship')
# Training 共50000,取前20000作為訓練集
n_training_sample = 50000
train_sample = SubsetRandomSampler(np.arange(n_training_sample, dtype=np.int64))
# Validation 取訓練集中的[20000,20000+5000]作為驗證集
# n_validation_sample = 5000
# validation_sample = SubsetRandomSampler(np.arange(n_training_sample, n_training_sample + n_validation_sample,dtype=np.int64))
# Testing 共10000,取前5000作為測試集
n_test_sample = 10000
test_sample = SubsetRandomSampler(np.arange(n_test_sample, dtype=np.int64))
# 開啟shuffle就等於全集使用SubsetRandomSampler,都是隨機采樣,num_workers代表多線程加載數據,Windows上不能用(必須0),Linux可用
train_batch_size = 100
test_batch_size = 4
train_loader = torch.utils.data.DataLoader(train_set, batch_size=train_batch_size, sampler=train_sample, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size, sampler=test_sample, num_workers=0)
#val_loader = torch.utils.data.DataLoader(train_set, batch_size=500, sampler=validation_sample, num_workers=0)
# ================================================================================================
# 2 建立模型
# MNIST案例的網絡是卷積+全連接層的形式,這種結構的網絡效果其實不好:
# 因為全連接層傳遞效率較低,同時會干擾到卷積層提取出的局部特征。
# 並且也沒有用到BatchNorm和Dropout來防止過擬合的問題。
# 現在流行的網絡結構大多采用全卷積層的結構:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 64, 3, padding = 1)
self.conv2 = nn.Conv2d(64, 64, 3, padding = 1)
self.conv3 = nn.Conv2d(64, 128, 3, padding = 1)
self.conv4 = nn.Conv2d(128, 128, 3, padding = 1)
self.conv5 = nn.Conv2d(128, 256, 3, padding = 1)
self.conv6 = nn.Conv2d(256, 256, 3, padding = 1)
self.maxpool = nn.MaxPool2d(2, 2)
self.avgpool = nn.AvgPool2d(2, 2)
self.globalavgpool = nn.AvgPool2d(8, 8)
self.bn1 = nn.BatchNorm2d(64)
self.bn2 = nn.BatchNorm2d(128)
self.bn3 = nn.BatchNorm2d(256)
self.dropout50 = nn.Dropout(0.5)
self.dropout10 = nn.Dropout(0.1)
self.fc = nn.Linear(256, 10)
def forward(self, x):
x = self.bn1(F.relu(self.conv1(x)))
x = self.bn1(F.relu(self.conv2(x)))
x = self.maxpool(x)
x = self.dropout10(x)
x = self.bn2(F.relu(self.conv3(x)))
x = self.bn2(F.relu(self.conv4(x)))
x = self.avgpool(x)
x = self.dropout10(x)
x = self.bn3(F.relu(self.conv5(x)))
x = self.bn3(F.relu(self.conv6(x)))
x = self.globalavgpool(x)
x = self.dropout50(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
cnn = CNN()
# 如有GPU則自動使用GPU計算
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cnn.to(device)
# ===================================================================================
# 損失函數
loss_func = torch.nn.CrossEntropyLoss() # 交叉熵損失函數
# 優化器
optimizer = optim.Adam(cnn.parameters(), lr=0.001) # Adam 優化算法是隨機梯度下降算法的擴展式
# ==========================================================================================
def trainNet(epoch):
print('Epoch {}'.format(epoch))
# 加載數據集上邊的方法解釋了獲取訓練數據
training_start_time = time.time() # 開始時間,為了后邊統計一個訓練花費時間
#循環訓練 n_epochs是5,也就是重復掃 五遍樣本數據,CIFAR10數據集將50000條訓練數據分為了五個batch,所以這個地方不要有疑惑
start_time = time.time()
train_loss = 0
for step,(x_batch, y_batch) in enumerate(train_loader):
x_batch = x_batch.cuda()
y_batch = y_batch.cuda()
# forward:前向傳播
outputs = cnn(x_batch) #
loss = loss_func(outputs, y_batch)
train_loss += loss.item()
# 在一個epoch里。每十組batchsize大小的數據輸出一次結果,即以batch_size大小的數據為一組,到第10組,20組,30組...的時候輸出
if step % (len(train_loader)/100) == 0:
print("epoch{}, {:d}% \t loss:{:.6f} took:{:.2f}s".format(epoch, int(100 * (step) / len(train_loader)),loss.item(), time.time()-start_time))
start_time = time.time()
#backward:后向傳播
optimizer.zero_grad() # 將所有的梯度置零,原因是防止每次backward的時候梯度會累加
loss.backward() # 根據反向傳播更新所有的參數
optimizer.step()
print("Training loss={}, took {:.2f}s".format(train_loss/(len(train_loader)),time.time() - training_start_time)) # 所有的Epoch結束,也就是訓練結束,計算花費的時間
#使用以下方法保存和恢復網絡參數
#torch.save(cnn, 'cifar10.pkl')
#cnn = torch.load('cifar10.pkl')
def test():
correct = 0
test_loss = 0
cnn.eval()
with torch.no_grad():
for data in test_loader:
# Forward pass
x_batch,y_batch = data
x_batch = x_batch.cuda()
y_batch = y_batch.cuda()
out = cnn(x_batch)
loss = loss_func(out, y_batch)
predicted = torch.max(out, 1)[1]
correct += (predicted == y_batch).sum().item()
test_loss += loss.item()
print("test loss = {:.2f}, Accuracy={:.6f}".format(test_loss / len(test_loader),correct/len(test_loader)/test_batch_size)) # 求驗證集的平均損失是多少
# 執行整個訓練過程
for epoch in range(1,11):
trainNet(epoch)
test()
# 統計每類的分類准確率
cnn.eval()
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in test_loader:
x_batch, y_batch = data
x_batch, y_batch = x_batch.to(device), y_batch.to(device)
out = cnn(x_batch)
predicted = torch.max(out, 1)[1]
c = (predicted == y_batch).squeeze()
#
for i in range(test_batch_size):
label = y_batch[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
cifar10教程補充內容
更優選的網絡,類似VGG
這個網絡比前面那個准確率更高一些.
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(3,64,3,padding=1)
self.conv2 = nn.Conv2d(64,64,3,padding=1)
self.pool1 = nn.MaxPool2d(2, 2)
self.bn1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU()
self.conv3 = nn.Conv2d(64,128,3,padding=1)
self.conv4 = nn.Conv2d(128, 128, 3,padding=1)
self.pool2 = nn.MaxPool2d(2, 2, padding=1)
self.bn2 = nn.BatchNorm2d(128)
self.relu2 = nn.ReLU()
self.conv5 = nn.Conv2d(128,128, 3,padding=1)
self.conv6 = nn.Conv2d(128, 128, 3,padding=1)
self.conv7 = nn.Conv2d(128, 128, 1,padding=1)
self.pool3 = nn.MaxPool2d(2, 2, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU()
self.conv8 = nn.Conv2d(128, 256, 3,padding=1)
self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
self.pool4 = nn.MaxPool2d(2, 2, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU()
self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
self.pool5 = nn.MaxPool2d(2, 2, padding=1)
self.bn5 = nn.BatchNorm2d(512)
self.relu5 = nn.ReLU()
self.fc14 = nn.Linear(512*4*4,1024)
self.drop1 = nn.Dropout2d()
self.fc15 = nn.Linear(1024,1024)
self.drop2 = nn.Dropout2d()
self.fc16 = nn.Linear(1024,10)
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = self.pool1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.pool2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.pool3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.pool4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.pool5(x)
x = self.bn5(x)
x = self.relu5(x)
# print(" x shape ",x.size())
x = x.view(-1,512*4*4)
x = F.relu(self.fc14(x))
x = self.drop1(x)
x = F.relu(self.fc15(x))
x = self.drop2(x)
x = self.fc16(x)
return x
顯示圖片及標簽
顯示一些訓練集中的照片:
import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
dataiter = iter(trainloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
顯示預測結果和實際結果:
dataiter = iter(testloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
outputs = net(images)
predicted = torch.max(outputs, 1)[1]
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))