本次遷移學習訓練的是865種魚的分類,使用的是WildFish數據集:
百度雲盤鏈接:https://pan.baidu.com/s/1_kHg87LghgWT9_mVawGdYQ
提取碼:a9pl
導入一些包:
import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms import numpy as np from torchvision import datasets, models, transforms import os import matplotlib.pyplot as plt import pandas as pd from PIL import Image from torch.utils.data import Dataset
把數據集分成Training和Testing兩個部分:
# 把數據變成可讀取的數據
# 數據集中提供了Training和Testing兩部分,但是以5:5分配,這里使用8:2的方式重新分配 tb1 = pd.read_table('./train.txt', sep=' ', names=['path', 'label']) tb1['path'] = r'E:\data\wildfish\\' + tb1['path'] tb2 = pd.read_table('./val.txt', sep=' ', names=['path', 'label']) tb2['path'] = r'E:\data\wildfish\\' + tb2['path']
# 把兩個數據集合並成一個數據集
tb = pd.concat([tb1, tb2], sort=True).reset_index(drop=True)
tb = tb.loc[:, ['path', 'label']]
# 序號的尾號為9和0記為Testing,其余的記為Training
train_rows = [i for i in range(tb.shape[0]) if (i % 10 != 9 and i % 10 != 0)]
test_rows = [i for i in range(tb.shape[0]) if (i % 10 == 9 or i % 10 == 0)]
train_data = tb.iloc[train_rows]
test_data = tb.iloc[test_rows]
# 存儲數據集
train_data.to_csv(r'./train_path.txt', sep=' ', header=None, index=False)
test_data.to_csv(r'./test_path.txt', sep=' ', header=None, index=False)
pytorch最后可讀取的圖片名稱(以絕對路徑顯示)和類別名稱如下圖所示:
定義一些超參數:
# 定義是否使用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") EPOCH = 10 #遍歷數據集次數 pre_epoch = 0 # 定義已經遍歷數據集的次數 BATCH_SIZE = 128 #批處理尺寸(batch_size) LR = 0.0001 #學習率
對數據做預處理
# 准備數據集並預處理 transform_train = transforms.Compose([ transforms.Resize((150, 150)), transforms.RandomHorizontalFlip(0.5), # 圖像一半的概率翻轉,一半的概率不翻轉 transforms.RandomVerticalFlip(0.5), # 豎直翻轉 transforms.RandomRotation(30), transforms.RandomCrop(128, padding=4), # transforms.ColorJitter(brightness=0.5), # transforms.ColorJitter(contrast=0), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #R,G,B每層的歸一化用到的均值和方差 ]) transform_test = transforms.Compose([ transforms.Resize((128, 128)), # 調整圖像大小 transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])
將數據放到TrainLoader和TestLoader中
class MyDataset(Dataset): def __init__(self, txt_path, transform = None, target_transform = None): fh = open(txt_path, 'r', encoding='utf-8') imgs = [] for line in fh: line = line.rstrip() words = line.split() imgs.append((words[0], int(words[1]))) self.imgs = imgs self.transform = transform self.target_transform = target_transform def __getitem__(self, index): fn, label = self.imgs[index] img = Image.open(fn).convert('RGB') if self.transform is not None: img = self.transform(img) return img, label def __len__(self): return len(self.imgs) train_datasets = MyDataset(r'./train_path.txt', transform=transform_train) test_datasets = MyDataset(r'./test_path.txt', transform=transform_test) # 由於我使用的是Win10系統,num_workers只能設置為0,其他系統可以調大此參數,提高訓練速度 trainloader = torch.utils.data.DataLoader(train_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) testloader = torch.utils.data.DataLoader(test_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
查看圖片的代碼,不執行不會影響后續的訓練
# 查看圖片 to_pil_image = transforms.ToPILImage() cnt = 0 for image,label in trainloader: if cnt>=3: # 只顯示3張圖片 break print(label) # 顯示label img = image[0] # plt.imshow()只能接受3-D Tensor,所以也要用image[0]消去batch那一維 img = img.numpy() # FloatTensor轉為ndarray img = np.transpose(img, (1,2,0)) # 把channel那一維放到最后 # 顯示圖片 plt.imshow(img) plt.show() cnt += 1
調用VGG16的預訓練模型
class VGGNet(nn.Module): def __init__(self, num_classes=685): # num_classes,此處為 二分類值為2 super(VGGNet, self).__init__() net = models.vgg16(pretrained=True) # 從預訓練模型加載VGG16網絡參數 net.classifier = nn.Sequential() # 將分類層置空,下面將改變我們的分類層 self.features = net # 保留VGG16的特征層 self.classifier = nn.Sequential( # 定義自己的分類層 nn.Linear(512 * 7 * 7, 1024), #512 * 7 * 7不能改變 ,由VGG16網絡決定的,第二個參數為神經元個數可以微調 nn.ReLU(True), nn.Dropout(0.3), nn.Linear(1024, 1024), nn.ReLU(True), nn.Dropout(0.3), nn.Linear(1024, num_classes), ) def forward(self, x): x = self.features(x) # 預訓練提供的提取特征的部分 x = x.view(x.size(0), -1) x = self.classifier(x) # 自定義的分類部分 return x net = VGGNet().to(device)
調用ResNet18的預訓練模型
class ResNet(nn.Module): def __init__(self, num_classes=685): # num_classes,此處為 二分類值為2 super(ResNet, self).__init__() net = models.resnet18(pretrained=True) # 從預訓練模型加載VGG16網絡參數 net.classifier = nn.Sequential() # 將分類層置空,下面將改變我們的分類層 self.features = net # 保留VGG16的特征層 self.classifier = nn.Sequential( # 定義自己的分類層 nn.Linear(1000, 1000), #1000不能改變 ,由VGG16網絡決定的,第二個參數為神經元個數可以微調 nn.ReLU(True), nn.Dropout(0.5), # nn.Linear(1024, 1024), # nn.ReLU(True), # nn.Dropout(0.3), nn.Linear(1000, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x net = ResNet().to(device)
MobileNet V2的預訓練模型
class MobileNet(nn.Module): def __init__(self, num_classes=685): # num_classes,此處為 二分類值為2 super(MobileNet, self).__init__() net = models.mobilenet_v2(pretrained=True) # 從預訓練模型加載VGG16網絡參數 net.classifier = nn.Sequential() # 將分類層置空,下面將改變我們的分類層 self.features = net # 保留VGG16的特征層 self.classifier = nn.Sequential( # 定義自己的分類層 nn.Linear(1280, 1000), #512 * 7 * 7不能改變 ,由VGG16網絡決定的,第二個參數為神經元個數可以微調 nn.ReLU(True), nn.Dropout(0.5), # nn.Linear(1024, 1024), # nn.ReLU(True), # nn.Dropout(0.3), nn.Linear(1000, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x net = MobileNet().to(device)
選擇優化器和Loss
optimizer = optim.Adam(net.parameters(), lr=0.0001) criterion = nn.CrossEntropyLoss() criterion.to(device=device)
定義兩個函數,一個可以凍住features層,只訓練FC層,另一個把features層解凍,訓練所有參數
from collections.abc import Iterable def set_freeze_by_names(model, layer_names, freeze=True): if not isinstance(layer_names, Iterable): layer_names = [layer_names] for name, child in model.named_children(): if name not in layer_names: continue for param in child.parameters(): param.requires_grad = not freeze def freeze_by_names(model, layer_names): set_freeze_by_names(model, layer_names, True) def unfreeze_by_names(model, layer_names): set_freeze_by_names(model, layer_names, False)
# 凍結 features層 freeze_by_names(net, ('features'))
# 解凍features層 unfreeze_by_names(net, ('features'))
定義兩個數組,為了存儲預測的y值和真實的y值
y_predict = []
y_true = []
# 我不導入這個包會報錯,
from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True
訓練過程
# 訓練 print("Start Training!") # 定義遍歷數據集的次數 for epoch in range(pre_epoch, EPOCH): print('\nEpoch: %d' % (epoch + 1)) net.train() sum_loss = 0.0 correct = 0.0 total = 0.0 for i, data in enumerate(trainloader, 0): # 准備數據 length = len(trainloader) inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() # forward + backward outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # 每訓練1個batch打印一次loss和准確率 sum_loss += loss.item() # 使用Top5分類 maxk = max((1,5)) label_resize = labels.view(-1, 1) _, predicted = outputs.topk(maxk, 1, True, True) total += labels.size(0) correct += torch.eq(predicted, label_resize).cpu().sum().float().item() print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% ' % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total)) # 每訓練完一個epoch測試一下准確率 print("Waiting Test!") with torch.no_grad(): correct = 0 total = 0 for data in testloader: net.eval() images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) # 取得分最高的那個類 (outputs.data的索引號) maxk = max((1,5)) label_resize = labels.view(-1, 1) _, predicted = outputs.topk(maxk, 1, True, True) total += labels.size(0) correct += torch.eq(predicted, label_resize).cpu().sum().float().item() y_predict.append(predicted) y_true.append(labels) print('測試分類准確率為:%.3f%%' % (100 * correct / total)) acc = 100. * correct / total print("Training Finished, TotalEPOCH=%d" % EPOCH)
保存模型
torch.save(net, './model/mobileNet freeze.pth')
加載模型
net = torch.load('./model/VGG16-2 freeze.pth')
訓練過程
我是先把特征層凍住訓練10個epoch,再解凍訓練20個epoch,各個模型在Training上的准確率基本在98%左右,在Testing上的准確率在88%左右。