pytorch實戰 貓狗大戰Kaggle 遷移學習ResNet50模型微調
貓狗大戰數據集
-
這是kaggle上一個非常經典的二分類圖像數據集,訓練集包括25000張貓和狗的圖片及其標簽,測試集則是12500張未標簽圖片,數據下載地址https://www.kaggle.com/c/dogs-vs-cats/data。不過這個網址比較遠古,無法提交自己訓練的答案,可以到新的(
雖然也不新了)比賽鏈接提交https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/overview -
將訓練數據按類別分開整理成如下結構
|-- train |-- cat |-- 1.jpg |-- 2.jpg |-- ... |-- dog |-- 1.jpg |-- 2.jpg |-- ...
數據加載及處理
-
整理好數據后,我們可以直接使用ImageFolder讀取並使用random_split()划分數據集驗證集
all_data = torchvision.datasets.ImageFolder( root=train_root, transform=train_transform ) train_data , vaild_data= torch.utils.data.random_split(all_data,[int(0.8*len(all_data)),len(all_data)-int(0.8*len(all_data)))
-
復雜的也可以繼承datasets類,簡單示例
class MyDataset(Dataset): def __init__(self, root, size=229, ): """ Initialize the data producer """ self._root = root self._size = size self._num_image = len(os.listdir(root)) self._img_name = os.listdir(root) def __len__(self): return self._num_image def __getitem__(self, index): img = Image.open(os.path.join(self._root, self._img_name[index])) # PIF image: H × W × C # torch image: C × H × W img = np.array(img, dtype-np.float32).transpose((2, 0, 1)) return img
-
為防止過擬合,可以對數據進行翻轉,亮度,對比度等數據增廣
train_transform = transforms.Compose([ transforms.Resize(224), transforms.RandomResizedCrop(224,scale=(0.6,1.0),ratio=(0.8,1.0)), transforms.RandomHorizontalFlip(), torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0), torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0), transforms.ToTensor(), transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) ])
-
加載為pytorch讀取的數據集
train_set = torch.utils.data.DataLoader( train_data, batch_size=BTACH_SIZE, shuffle=True ) test_set = torch.utils.data.DataLoader( vaild_data, batch_size=BTACH_SIZE, shuffle=False )
遷移學習 ResNet50微調
-
此前自己寫過淺層的CNN,驗證集准確率只能達到七十多,深了不會寫估計訓練也很難,於是采用遷移學習的思想,torchvision提供了很多現成的模型和預訓練好的參數:
- Alexnet
- VGG
- ResNet
- SqueezeNet
- DenseNet
- Inception v3
-
這里我們使用殘差網絡 ResNet50 並且加上全連接層和softmax輸出二分類
model = torchvision.models.resnet50(pretrained=True) model.fc = nn.Sequential( nn.Linear(2048,2) )
-
這里我用0.01的學習率訓練了5次就能達到九十多的准確率了,實際上還可以對輸出層使用較高的學習率而對其他層使用較低的學習率來達到更好的微調效果
Kaggle提交
- 直接提交看看,貌似一般般,在排行榜里算中等
- kaggle這里的評估是使用logloss
- 我們看看標簽分別是0和1的時候這個loss是怎樣的
- 這時候就可以有一些
奇淫技巧,面向評估函數編程,把預測狗的統一改成0.95,貓的改成0.05,再提交一下
- 僅供娛樂
代碼
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
#超參
DEVICE = torch.device('cuda')
LR = 0.001
EPOCH = 50
BTACH_SIZE = 32
train_root = './train'
#數據加載及處理
train_transform = transforms.Compose([
transforms.Resize(224),
transforms.RandomResizedCrop(224,scale=(0.6,1.0),ratio=(0.8,1.0)),
transforms.RandomHorizontalFlip(),
torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0),
torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0),
transforms.ToTensor(),
transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
])
all_data = torchvision.datasets.ImageFolder(
root=train_root,
transform=train_transform
)
train_data , vaild_data= torch.utils.data.random_split(all_data,[int(0.8*len(all_data)),len(all_data)-int(0.8*len(all_data))])
train_set = torch.utils.data.DataLoader(
train_data,
batch_size=BTACH_SIZE,
shuffle=True
)
test_set = torch.utils.data.DataLoader(
vaild_data,
batch_size=BTACH_SIZE,
shuffle=False
)
#訓練和驗證
criteration = nn.CrossEntropyLoss()
def train(model,device,dataset,optimizer,epoch):
model.train()
correct = 0
for i,(x,y) in tqdm(enumerate(dataset)):
x , y = x.to(device), y.to(device)
optimizer.zero_grad()
output = model(x)
pred = output.max(1,keepdim=True)[1]
correct += pred.eq(y.view_as(pred)).sum().item()
loss = criteration(output,y)
loss.backward()
optimizer.step()
print("Epoch {} Loss {:.4f} Accuracy {}/{} ({:.0f}%)".format(epoch,loss,correct,len(dataset),100*correct/len(dataset)))
def vaild(model,device,dataset):
model.eval()
correct = 0
with torch.no_grad():
for i,(x,y) in tqdm(enumerate(dataset)):
x,y = x.to(device) ,y.to(device)
output = model(x)
loss = nn.CrossEntropyLoss(output,y)
pred = output.max(1,keepdim=True)[1]
correct += pred.eq(y.view_as(pred)).sum().item()
print("Test Loss {:.4f} Accuracy {}/{} ({:.0f}%)".format(loss,correct,len(dataset),100.*correct/len(dataset)))
model = torchvision.models.resnet50(pretrained=True)
model.fc = nn.Sequential(
nn.Linear(2048,2)
)
model.to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr = LR, momentum = 0.09)
for epoch in range(1,EPOCH+1):
train(model,DEVICE,train_set,optimizer,epoch)
vaild(model,DEVICE,test_set)