本博文內容:
- Caltech101數據集;
- 神經網絡(模型、工具、目錄)
- 編寫代碼
一、Caltech101數據集;
這個數據集包含了101類的圖像,每類大約有40~800張圖像,大部分是50張/類,在2003年由lifeifei收集,每張圖像的大小大約是300x200.
圖像的類別分布:
按Top40圖片數量從大到小的順序展示:
整體數據集情況:
可以看到圖片的數量非常不均衡;
像這樣類別不均衡的圖片是深度學習表現力的的主要原因之一
二、神經網絡(模型、工具、目錄)
網絡:ResNet34
使用 ImageNet中預訓練好的權重——遷移學習提高深度學習的表現力
對於隱藏層的權重,我們將不進行更新,但是我們會微調ResNet34網絡的頭部來支持我們的網絡;
當進行微調的時候,我們同樣也會加入Droput層;
如何在類別不均衡的圖片上實現較高的精度
圖片類別不均衡的解決方法:
1)獲取更多的數據
2)使用數據增強;
在數據無法增多的情況下,數據增強效果比較好;數據增強使神經網絡可以看到數據不同類型的變化,大小、角度、顏色等;
但是我們現在不使用上述兩種方法,事實上,我們將采用的是遷移學習和微調神經網絡來實現更高的精度;
工具:
Install PyTorch.
Install pretraindemodels. ——提供ResNet預訓練模型
pip install pretrainedmodels
Install imutils ——實現圖片的旋轉縮放等;
pip install imutils
目錄
1 ├───input 2 │ ├───101_ObjectCategories 3 │ │ ├───accordion 4 │ │ ├───airplanes 5 │ │ ├───anchor 6 │ │ ├───ant 7 │ │ ├───BACKGROUND_Google 8 │ │ ├───barrel 9 │ │ ├───bass 10 │ │ ├───beaver 11 │ │ ├───binocular 12 │ │ ├───bonsai 13 │ │ ├───brain 14 │ │ ├───brontosaurus 15 ... 16 ├───outputs 17 │ ├───models #最終訓練好的模型結果 18 │ └───plots 19 └───src 20 └───train.py
- 編寫代碼
導入相關的包
1 # imports 2 import matplotlib.pyplot as plt 3 import matplotlib 4 import joblib 5 import cv2 #把圖片讀入到數據集中 6 import os 7 import torch 8 import numpy as np 9 import torch.nn as nn 10 import torch.nn.functional as F 11 import torch.optim as optim 12 import time 13 import random 14 import pretrainedmodels 15 from imutils import paths 16 from sklearn.preprocessing import LabelBinarizer 17 from sklearn.model_selection import train_test_split 18 from torchvision.transforms import transforms 19 from torch.utils.data import DataLoader, Dataset 20 from tqdm import tqdm 21 matplotlib.style.use('ggplot') 22 '''SEED Everything''' 23 def seed_everything(SEED=42): #應用不同的種子產生可復現的結果 24 random.seed(SEED) 25 np.random.seed(SEED) 26 torch.manual_seed(SEED) 27 torch.cuda.manual_seed(SEED) 28 torch.cuda.manual_seed_all(SEED) 29 torch.backends.cudnn.benchmark = True # keep True if all the input have same size. 30 SEED=42 31 seed_everything(SEED=SEED) 32 '''SEED Everything'''
超參數的設置:
定義設備、EOPCH以及batch size
1 if torch.cuda.is_available(): 2 device = 'cuda' 3 else: 4 device = 'cpu' 5 6 epochs = 5 7 BATCH_SIZE = 16
准備標簽和圖像
1 image_paths = list(paths.list_images('../input/101_ObjectCategories')) 2 data = [] 3 labels = [] 4 for image_path in image_paths: 5 label = image_path.split(os.path.sep)[-2] 6 if label == 'BACKGROUND_Google': 7 continue 8 image = cv2.imread(image_path) 9 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 10 data.append(image) 11 labels.append(label) 12 data = np.array(data) 13 labels = np.array(labels)
使用One-hot編碼對label進行編碼
定義圖像變換
1 # define transforms 2 train_transform = transforms.Compose( 3 [transforms.ToPILImage(), 4 transforms.Resize((224, 224)), 5 transforms.ToTensor(), 6 transforms.Normalize(mean=[0.485, 0.456, 0.406], 7 std=[0.229, 0.224, 0.225])]) 8 val_transform = transforms.Compose( 9 [transforms.ToPILImage(), 10 transforms.Resize((224, 224)), 11 transforms.ToTensor(), 12 transforms.Normalize(mean=[0.485, 0.456, 0.406], 13 std=[0.229, 0.224, 0.225])])
一般只對訓練數據進行數據變換;
所以我們在此分開寫訓練和驗證的數據變換函數;
數據分割,切分為訓練、驗證和測試集
1 # divide the data into train, validation, and test set 2 (X, x_val , Y, y_val) = train_test_split(data, labels, 3 test_size=0.2, 4 stratify=labels, 5 random_state=42) 6 (x_train, x_test, y_train, y_test) = train_test_split(X, Y, 7 test_size=0.25, 8 random_state=42) 9 print(f"x_train examples: {x_train.shape}\nx_test examples: {x_test.shape}\nx_val examples: {x_val.shape}")
輸出:
1 x_train examples: (5205,) 2 x_test examples: (1736,) 3 x_val examples: (1736,)
創建自定義數據集和Loaders
1 # custom dataset 2 class ImageDataset(Dataset): 3 def __init__(self, images, labels=None, transforms=None): 4 self.X = images 5 self.y = labels 6 self.transforms = transforms 7 8 def __len__(self): 9 return (len(self.X)) 10 11 def __getitem__(self, i): 12 data = self.X[i][:] 13 14 if self.transforms: 15 data = self.transforms(data) 16 17 if self.y is not None: 18 return (data, self.y[i]) 19 else: 20 return data 21 22 train_data = ImageDataset(x_train, y_train, train_transform) 23 val_data = ImageDataset(x_val, y_val, val_transform) 24 test_data = ImageDataset(x_test, y_test, val_transform)
1 # dataloaders 2 trainloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True) 3 valloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True) 4 testloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)
注意:只對訓練集和驗證集進行shuffle,對測試集不進行shuffle
神經網絡模型搭建;
1 # the resnet34 model 2 class ResNet34(nn.Module): 3 def __init__(self, pretrained): 4 super(ResNet34, self).__init__() 5 if pretrained is True: 6 self.model = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet') 7 else: 8 self.model = pretrainedmodels.__dict__['resnet34'](pretrained=None) 9 10 # change the classification layer 11 self.l0 = nn.Linear(512, len(lb.classes_)) 12 self.dropout = nn.Dropout2d(0.4) 13 def forward(self, x): 14 # get the batch size only, ignore (c, h, w) 15 batch, _, _, _ = x.shape 16 x = self.model.features(x) 17 x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1) 18 x = self.dropout(x) 19 l0 = self.l0(x) 20 return l0 21 model = ResNet34(pretrained=True).to(device)
優化器和損失函數定義
1 # optimizer 2 optimizer = optim.Adam(model.parameters(), lr=1e-4) 3 # loss function 4 criterion = nn.CrossEntropyLoss()
訓練函數:
1 # training function 2 def fit(model, dataloader): 3 print('Training') 4 model.train() 5 running_loss = 0.0 6 running_correct = 0 7 for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)): 8 data, target = data[0].to(device), data[1].to(device) 9 optimizer.zero_grad() 10 outputs = model(data) 11 loss = criterion(outputs, torch.max(target, 1)[1]) 12 running_loss += loss.item() 13 _, preds = torch.max(outputs.data, 1) 14 running_correct += (preds == torch.max(target, 1)[1]).sum().item() 15 loss.backward() 16 optimizer.step() 17 18 loss = running_loss/len(dataloader.dataset) 19 accuracy = 100. * running_correct/len(dataloader.dataset) 20 21 print(f"Train Loss: {loss:.4f}, Train Acc: {accuracy:.2f}") 22 23 return loss, accuracy
驗證函數:
1 #validation function 2 def validate(model, dataloader): 3 print('Validating') 4 model.eval() 5 running_loss = 0.0 6 running_correct = 0 7 with torch.no_grad(): 8 for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)): 9 data, target = data[0].to(device), data[1].to(device) 10 outputs = model(data) 11 loss = criterion(outputs, torch.max(target, 1)[1]) 12 13 running_loss += loss.item() 14 _, preds = torch.max(outputs.data, 1) 15 running_correct += (preds == torch.max(target, 1)[1]).sum().item() 16 17 loss = running_loss/len(dataloader.dataset) 18 accuracy = 100. * running_correct/len(dataloader.dataset) 19 print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}') 20 21 return loss, accuracy
測試函數:
1 correct = 0 2 total = 0 3 with torch.no_grad(): 4 for data in testloader: 5 inputs, target = data[0].to(device), data[1].to(device) 6 outputs = model(inputs) 7 _, predicted = torch.max(outputs.data, 1) 8 total += target.size(0) 9 correct += (predicted == torch.max(target, 1)[1]).sum().item() 10 return correct, total
模型的訓練:
1 train_loss , train_accuracy = [], [] 2 val_loss , val_accuracy = [], [] 3 print(f"Training on {len(train_data)} examples, validating on {len(val_data)} examples...") 4 start = time.time() 5 for epoch in range(epochs): 6 print(f"Epoch {epoch+1} of {epochs}") 7 train_epoch_loss, train_epoch_accuracy = fit(model, trainloader) 8 val_epoch_loss, val_epoch_accuracy = validate(model, valloader) 9 train_loss.append(train_epoch_loss) 10 train_accuracy.append(train_epoch_accuracy) 11 val_loss.append(val_epoch_loss) 12 val_accuracy.append(val_epoch_accuracy) 13 end = time.time() 14 print((end-start)/60, 'minutes') 15 torch.save(model.state_dict(), f"../outputs/models/resnet34_epochs{epochs}.pth") 16 # accuracy plots 17 plt.figure(figsize=(10, 7)) 18 plt.plot(train_accuracy, color='green', label='train accuracy') 19 plt.plot(val_accuracy, color='blue', label='validataion accuracy') 20 plt.xlabel('Epochs') 21 plt.ylabel('Accuracy') 22 plt.legend() 23 plt.savefig('../outputs/plots/accuracy.png') 24 # loss plots 25 plt.figure(figsize=(10, 7)) 26 plt.plot(train_loss, color='orange', label='train loss') 27 plt.plot(val_loss, color='red', label='validataion loss') 28 plt.xlabel('Epochs') 29 plt.ylabel('Loss') 30 plt.legend() 31 plt.savefig('../outputs/plots/loss.png')
結果保存
1 # save the accuracy and loss lists as pickled files 2 print('Pickling accuracy and loss lists...') 3 joblib.dump(train_accuracy, '../outputs/models/train_accuracy.pkl') 4 joblib.dump(train_loss, '../outputs/models/train_loss.pkl') 5 joblib.dump(val_accuracy, '../outputs/models/val_accuracy.pkl') 6 joblib.dump(val_loss, '../outputs/models/val_loss.pkl')
測試網絡模型:
1 correct, total = test(model, testloader) 2 print('Accuracy of the network on test images: %0.3f %%' % (100 * correct / total)) 3 print('train.py finished running')