cnn+pytorch實現食物分類


食物品種共有11類:Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit.
Training set: 9866張
Validation set: 3430張
Testing set: 3347張

一、讀取數據

​ 想要解決這個問題,首先我們得先具備將數據讀取進入程序的能力,程序如下:

import os
import sys
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import time
import random

def readfile(path, label, img_num):
    # label 是一個,代表需不需要傳回 y 值
    image_dir = os.listdir(path)				# 獲取path下的文件名列表
    random.shuffle(image_dir)					# 打亂順序
    image_dir = sorted(image_dir[0:img_num])	# 選取指定數目,返回迭代器
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

# 分別將 training set、validation set、testing set 用 readfile 讀取
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True, 100)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True, 25)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False, 50)
print("Size of Testing data = {}".format(len(test_x)))

二、制作數據集

​ 為了方便數據進行訓練,參考pytorch的 torch.utils.data

​ torch.utils.data.Dataset 是一個表示數據集的抽象類,繼承它實現自定義的數據集,注意的是其中 __ getitem __()和__len__() 兩個函數必須重寫。

​ torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,...) 返回一個提供數據集數據的迭代器。

# 圖形變換
transform = transforms.Compose([
    transforms.ToTensor()
])
# 定義ImgDataset類,繼承Dataset,實現數據讀取方式
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

batch_size = 10		# 一次訓練所選取的樣本數
train_set = ImgDataset(train_x, train_y, transform)
val_set = ImgDataset(val_x, val_y, transform)
# 使用DataLoader(), 實現數據的批量讀取,一個迭代器的作用
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

三、定義模型及其優化

​ 由於是分類任務,首先使用CNN嘗試一下效果,結構為5層3*3卷積層+3層全連接層,激活函數采用ReLU,並且模型使用交叉熵損失函數和Adam優化算法。

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 8, 8]

            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)
    
model = Classifier()
loss = nn.CrossEntropyLoss()  	# 因為是分類任務,所以loss function使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # optimizer 使用 Adam

四、訓練數據集

​ 訓練過程分成兩個階段,先是訓練集跑模型,驗證集驗證模型性能,結果尚可,則訓練集合驗證集數據合並,訓練出最終的模型。

num_epoch = 10
# 開始訓練
for epoch in range(num_epoch):
    epoch_start_time = time.time()

    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train()  								# train model會開放Dropout和BN
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()  					# 每次用前將梯度置零
        train_pred = model(data[0]) 			# 利用 model 的 forward 函數返回預測值
        batch_loss = loss(train_pred, data[1])  # 計算 loss
        batch_loss.backward()  					# tensor(item, grad_fn=<NllLossBackward>)
        optimizer.step()  						# 以 optimizer 用 gradient 更新參數

        train_acc += np.sum(np.argmax(train_pred.data.numpy(),axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    model.eval()
    with torch.no_grad():   					# 不跟蹤梯度
        for i, data in enumerate(val_loader):
            val_pred = model(data[0])
            batch_loss = loss(val_pred, data[1])

            val_acc += np.sum(np.argmax(val_pred.data.numpy(),axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #  打印結果
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
              (epoch + 1, num_epoch, time.time() - epoch_start_time, \
               train_acc / train_set.__len__(), train_loss / train_set.__len__(), val_acc / val_set.__len__(),
               val_loss / val_set.__len__()))


# 將訓練集合驗證集合並,再次訓練
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

model_best = Classifier()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 10

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0])
        batch_loss = loss(train_pred, data[1])
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.data.numpy(),axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    #打印結果
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

五、測試集預測

​ 基於上面的模型對測試集進行預測,提交結果。

# 對測試集進行預測
test_set = ImgDataset(test_x, transform=transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data)
        test_label = np.argmax(test_pred.data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)
            
            
            
# 打印測試集預測值
print(prediction)

# 運行結果
Reading data
Size of training data = 100
Size of validation data = 25
Size of Testing data = 50
[001/010] 29.65 sec(s) Train Acc: 0.110000 Loss: 0.381131 | Val Acc: 0.080000 loss: 0.291486
[002/010] 24.25 sec(s) Train Acc: 0.090000 Loss: 0.254135 | Val Acc: 0.040000 loss: 0.279987
[003/010] 24.85 sec(s) Train Acc: 0.120000 Loss: 0.234980 | Val Acc: 0.000000 loss: 0.276088
[004/010] 28.55 sec(s) Train Acc: 0.200000 Loss: 0.218716 | Val Acc: 0.080000 loss: 0.274945
[005/010] 27.35 sec(s) Train Acc: 0.330000 Loss: 0.202054 | Val Acc: 0.120000 loss: 0.277814
[006/010] 27.55 sec(s) Train Acc: 0.290000 Loss: 0.202682 | Val Acc: 0.080000 loss: 0.279993
[007/010] 25.17 sec(s) Train Acc: 0.320000 Loss: 0.185320 | Val Acc: 0.080000 loss: 0.315286
[008/010] 23.11 sec(s) Train Acc: 0.400000 Loss: 0.168070 | Val Acc: 0.000000 loss: 0.293173
[009/010] 23.24 sec(s) Train Acc: 0.370000 Loss: 0.164915 | Val Acc: 0.200000 loss: 0.275563
[010/010] 24.79 sec(s) Train Acc: 0.480000 Loss: 0.149648 | Val Acc: 0.040000 loss: 0.342993
[001/010] 28.93 sec(s) Train Acc: 0.160000 Loss: 0.371954
[002/010] 28.82 sec(s) Train Acc: 0.080000 Loss: 0.258056
[003/010] 27.99 sec(s) Train Acc: 0.240000 Loss: 0.230340
[004/010] 28.19 sec(s) Train Acc: 0.224000 Loss: 0.223114
[005/010] 28.35 sec(s) Train Acc: 0.232000 Loss: 0.209750
[006/010] 27.52 sec(s) Train Acc: 0.376000 Loss: 0.197971
[007/010] 28.17 sec(s) Train Acc: 0.328000 Loss: 0.188320
[008/010] 27.53 sec(s) Train Acc: 0.448000 Loss: 0.182249
[009/010] 29.18 sec(s) Train Acc: 0.344000 Loss: 0.203064
[010/010] 27.91 sec(s) Train Acc: 0.472000 Loss: 0.167914
[9, 10, 2, 5, 9, 2, 10, 10, 9, 10, 10, 0, 0, 0, 2, 0, 9, 5, 10, 4, 4, 0, 3, 0, 10, 2, 10, 4, 2, 4, 5, 9, 2, 10, 2, 5, 1, 10, 4, 9, 10, 10, 9, 9, 10, 10, 9, 10, 5, 2]



免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM