貓狗大戰:利用ResNet遷移學習進行圖像識別


掛載Google Drive,避免數據集重復下載

from google.colab import drive
drive.mount('/content/drive')

導入包、設置GPU、設定隨機種子

import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms,datasets
import time
import json
import shutil
from PIL import Image
import csv

# 判斷是否存在GPU設備
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())
# 設置隨機種子,方便復現
torch.manual_seed(10000)            # 為CPU設置隨機種子
torch.cuda.manual_seed(10000)       # 為當前GPU設置隨機種子
torch.cuda.manual_seed_all(10000)   # 為所有GPU設置隨機種子

下載數據集,並將數據及分類

#! wget https://static.leiphone.com/cat_dog.rar
!unrar x "/content/drive/My Drive/catdog/cat_dog.rar" "/content/sample_data"
%cd sample_data/
#將訓練集驗證集的貓狗圖像分別放入單獨文件夾內,方便ImageFolder讀取
for x in ['train','val']:
    imgPath = "cat_dog/"+x
    pathlist=os.listdir(imgPath)
    data_destination = 'cat_dog/'+x+'/cat/'
    label_destination = 'cat_dog/'+x+'/dog/'
    if not (os.path.exists(data_destination) and os.path.exists(label_destination)):
        os.makedirs(data_destination)
        os.makedirs(label_destination)

        # 根據文件名的特征進行分類並復制相應的文件到新文件夾
    for item in pathlist:
        # print(os.path.splitext(item)[0],os.path.splitext(item)[1])
        if os.path.splitext(item)[1] == '.jpg' and 'cat' in os.path.splitext(item)[0]:
            print(os.path.join(imgPath,item))
            shutil.move(os.path.join(imgPath,item), data_destination)
        elif os.path.splitext(item)[1] == '.jpg' and 'dog' in os.path.splitext(item)[0]:
            print(os.path.join(imgPath,item))
            shutil.move(os.path.join(imgPath,item), label_destination)

載入數據集,並對數據進行處理

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

resnet_format = transforms.Compose([
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])

data_dir = './cat_dog'

dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), resnet_format)
         for x in ['train', 'val']}

dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes

#resnet152下,需要顯存太大,將batch size調小為48
loader_train = torch.utils.data.DataLoader(dsets['train'], batch_size=48, shuffle=True, num_workers=6)
loader_valid = torch.utils.data.DataLoader(dsets['val'], batch_size=5, shuffle=False, num_workers=6)

載入ResNet152並修改模型全連接層

model = models.resnet152(pretrained=True)
model_new = model;
model_new.fc = nn.Linear(2048, 2,bias=True)
model_new = model_new.to(device)
print(model_new)

部分參數

#采用交叉熵損失函數
criterion = nn.CrossEntropyLoss()

# 學習率0.001,每10epoch *0.1
lr = 0.001

# 隨機梯度下降,momentum加速學習,Weight decay防止過擬合
optimizer = torch.optim.SGD(model_new.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

模型訓練

def val_model(model,dataloader,size):
    model.eval()
    predictions = np.zeros(size)
    all_classes = np.zeros(size)
    all_proba = np.zeros((size,2))
    i = 0
    running_loss = 0.0
    running_corrects = 0
    with torch.no_grad():
        for inputs,classes in dataloader:
            inputs = inputs.to(device)
            classes = classes.to(device)
            outputs = model(inputs)
            loss = criterion(outputs,classes)           
            _,preds = torch.max(outputs.data,1)
            # statistics
            running_loss += loss.data.item()
            running_corrects += torch.sum(preds == classes.data)
            #predictions[i:i+len(classes)] = preds.to('cpu').numpy()
            #all_classes[i:i+len(classes)] = classes.to('cpu').numpy()
            #all_proba[i:i+len(classes),:] = outputs.data.to('cpu').numpy()
            i += len(classes)
            #print('Testing: No. ', i, ' process ... total: ', size)        
    epoch_loss = running_loss / size
    epoch_acc = running_corrects.data.item() / size
    #print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
    return epoch_loss, epoch_acc 


def train_model(model,dataloader,size,epochs=1,optimizer=None):
    
    
    for epoch in range(epochs):
        model.train()
        
        running_loss = 0.0
        running_corrects = 0
        count = 0
        for inputs,classes in dataloader:
            inputs = inputs.to(device)
            classes = classes.to(device)
            outputs = model(inputs)
            loss = criterion(outputs,classes)           
            optimizer = optimizer
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            _,preds = torch.max(outputs.data,1)
            # statistics
            running_loss += loss.data.item()
            running_corrects += torch.sum(preds == classes.data)
            count += len(inputs)
            #print('Training: No. ', count, ' process ... total: ', size)
        epoch_loss = running_loss / size
        epoch_acc = running_corrects.data.item() / size
        epoch_Valloss, epoch_Valacc = val_model(model,loader_valid,dset_sizes['val'])
        print('epoch: ',epoch,' Loss: {:.5f} Acc: {:.5f} ValLoss: {:.5f} ValAcc: {:.5f}'.format(
                     epoch_loss, epoch_acc,epoch_Valloss,epoch_Valacc))
        scheduler.step()
        
        
#學習率衰減
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# 模型訓練
train_model(model_new,loader_train,size=dset_sizes['train'], epochs=20, 
            optimizer=optimizer)  

模型測試並輸出csv文件

model_new.eval()
csvfile = open('csv.csv', 'w') 
writer = csv.writer(csvfile)
test_root='./cat_dog/test/'
img_test=os.listdir(test_root)
img_test.sort(key= lambda x:int(x[:-4]))
for i in range(len(img_test)):
    img = Image.open(test_root+img_test[i]) 
    img = img.convert('RGB')
    input=resnet_format(img)
    input=input.unsqueeze(0)
    input = input.to(device)
    output=model_new(input)
    _,pred = torch.max(output.data,1)
    print(i,pred.tolist()[0])
    writer.writerow([i,pred.tolist()[0]])
csvfile.close()

訓練驗證結果如下:

測試結果如下:

  一開始采用的VGG16進行訓練,凍結FC層之前參數,將優化器由SGD改為Adam,1個epoch下測試結果得分98.1。多個epoch跑下來,效果提升有限,遂采用ResNet。

遇到的問題:

  • Colab數據集訓練時不要放在Google Drive中!Google Drive中數據和服務器計算是分離的,每次讀取數據都需要向Drive進行網絡請求,導致訓練速度被網絡速度拖慢,特別是在傳輸大量小圖片數據時。
  • 為了使結果復現,嘗試給PyTorch設置隨機種子,但還不能保證精確一致,或許還需要設置cudnn、python,numpy。PyTorch的可重復性問題 (如何使實驗結果可復現)
  • ResNet訓練時曾嘗試凍結FC層之前參數,效果不理想。
  • 大部分情況下,Adam效果相較SGD更好,然而在ResNet下,SGD效果比Adam好。
  • 修改網絡結構還可通過繼承的方式
  class Net(nn.Module):
      def __init__(self, model):
          super(Net, self).__init__()
          # 取掉model的后1層
          self.resnet_layer = nn.Sequential(*list(model.children())[:-1])
          self.Linear_layer = nn.Linear(2048, 2) #加上一層參數修改好的全連接層
  
      def forward(self, x):
          x = self.resnet_layer(x)
          x = x.view(x.size(0), -1)
          x = self.Linear_layer(x)
          return x
  
  model_new = Net(model)
  model_new = model_new.to(device)

待解決

  訓練和驗證的loss相差大約十倍,是不是哪里寫錯了。

吐槽

  Google Drive掛載功能維護了一天,Colab限額又鎖了一天,果然羊毛不是那么好薅的 - -。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM