torchvision 主要是由三大模塊組成, model, transforms, datasets
transforms 主要可以進行數據增強
datasets 主要下載一些常用的數據集如mnist數據集
model 主要是將原來的模型進行下載
第一部分: 數據集的准備工作
第一步: 使用transforms進行數據的增強操作, 使用torch.utils.data.DataLoader()構造批量數據集
第二步: 將數據集重新轉換為原來的樣子, 即轉換為numpy格式,變化顏色通道, 將均值和標准差彌補上,使用image.clip(0, 1) 將數據限制在0和1之間,最后進行圖像的顯示
第二部分: 數據集的訓練工作
第一步: 使用 initiallize_model() 初始化網絡
第二步: 對網絡進行訓練, 將效果最好的結果保存在路徑下,返回最好的模型的參數結果
第三部分: 數據集的測試工作
第一步: 對於輸入的當張圖片進行測試, 這里需要對輸入的圖片做valid操作
第二步: 對一個batch的valid進行測試,最后對結果進行顯示
import os import numpy as np import torch from torch import nn from torch import optim from torchvision import transforms, datasets, models import matplotlib.pyplot as plt import time import copy from PIL import Image # 第一部分數據的准備: # 數據讀取與預處理操作 data_dir = './flower_data' train_dir = '/train' test_dir = '/test' # 第一步: 數據的制作 data_transform = { "train": transforms.Compose([ transforms.Resize(256), transforms.RandomRotation(45), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1), transforms.RandomGrayscale(p=0.025), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'valid': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } def im_convert(tensor): image = tensor.to('cpu').clone().detach() # clone() 修改image不會修改tensor image = image.numpy().squeeze() # 去除尺寸為1的維度 image = image.transpose(1, 2, 0) image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406)) image = image.clip(0, 1) return image batch_size = 8 image_dataset = {x:datasets.ImageFolder(os.path.join(data_dir + train_dir), data_transform[x]) for x in ['train', 'valid']} dataloaders = {x:torch.utils.data.DataLoader(image_dataset[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']} # 第二步:獲得一個batch的驗證集,進行圖像的顯示 dataiter = iter(dataloaders['valid']) inputs, labels = dataiter.next() fig = plt.figure(figsize=(20, 12)) row = 2 columns = 4 for idx in range(row * columns): ax = fig.add_subplot(row, columns, idx+1, xticks=[], yticks=[]) plt.imshow(im_convert(inputs[idx])) plt.show() # 第二部分: 進行模型的訓練操作 # 進行模型的下載 model_name = 'resnet' # 是否使用人家訓練好的模型參數 feature_extract = True train_on_gpu = torch.cuda.is_available() if not train_on_gpu: print('CUDA is not availabel.Training on GPU...') else: print('CUDA is not availabel! Training on CPU') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model_ft = models.resnet152() print(model_ft) def set_parameter_requires_grad(model, feature_extract): if feature_extract: for param in model.parameters(): # 將反應的層數進行凍結 param.requires_grad = False # 初始化模型 # 第一步: 進行模型的初始化操作 def initiallize_model(model_name, num_classes, feature_extract, use_pretrained=True): model_ft = None input_size = 0 if model_name == 'resnet': """ Resnet 152 """ model_ft = models.resnet152(pretrained=use_pretrained) set_parameter_requires_grad(model_ft, feature_extract) num_ftrs = model_ft.fc.in_features # 最后一層全連接的個數 model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes), nn.LogSoftmax(dim=1)) input_size = 224 return model_ft, input_size model_ft, input_size = initiallize_model(model_name, 17, feature_extract, use_pretrained=True) # 進行GPU訓練 model_ft = model_ft.to(device) # 模型保存 filename = 'checkpoint.pth' param_to_update = model_ft.parameters() print(param_to_update) # # 進行所有層的訓練 # for param in model_ft.parameters(): # param.requires_grad = True if feature_extract: param_to_update = [] for name, param in model_ft.named_parameters(): if param.requires_grad == True: # 如果param.requires_grad是否進行訓練 param_to_update.append(param) print('\t', name) else: for name, param in model_ft.named_parameters(): if param.requires_grad == True: print('\t', name) print(model_ft) # 優化器設置 optmizer_ft = optim.Adam(param_to_update, lr= 1e-2) # 輸入為需要優化的參數 scheduler = optim.lr_scheduler.StepLR(optmizer_ft, step_size=7, gamma=0.1) # 對於學習率每7個epoch進行一次衰減 criterion = nn.NLLLoss() #輸入的是一個對數概率和標簽值 # 第二步: 進行模型的訓練模塊 def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, filename=filename): since = time.time() best_acc = 0 if os.path.exists(filename): checkpoint = torch.load(filename) best_acc = checkpoint('best_acc') model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) model.to(device) val_acc_history = [] train_acc_history = [] train_losses = [] valid_losses = [] LRs = [optimizer.param_groups[0]['lr']] best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(num_epochs): print("Epoch {} / {}".format(epoch, num_epochs - 1)) print('-' *10) # 訓練和驗證 for phase in ['train', 'valid']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) print('runing') # 清零操作 optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): if is_inception and phase == 'train': outputs, aux_outputs = model(inputs) loss1 = criterion(outputs, labels) loss2 = criterion(aux_outputs, labels) loss = loss1 + 0.4 * loss2 else: outputs = model(inputs) loss = criterion(outputs, labels) pred = torch.argmax(outputs, 1) if phase == 'train': loss.backward() optimizer.step() # 計算損失值 running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(pred == labels.data) epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_acc = running_corrects / len(dataloaders[phase].dataset) time_elapased = time.time() - since print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapased // 60, time_elapased % 60)) print("{} loss:{:.4f} Acc:{.4f}".format(phase, epoch_loss, epoch_acc)) # 將效果最好的一次模型進行保存 if phase == 'valid' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) state = { 'static_dict':model.state_dict(), 'best_acc':best_acc, 'optimizer':optimizer.state_dict(), } torch.save(state, filename) if phase == 'valid': val_acc_history.append(epoch_acc) valid_losses.append(epoch_loss) scheduler.step() if phase == 'train': train_acc_history.append(epoch_acc) train_losses.append(epoch_loss) time_elapased = time.time() - since print('Training complete in {:0.f}m {:0.f}s'.format(time_elapased // 60, time_elapased % 60)) print('Best val Acc{:4f}'.format(best_acc)) # 訓練結束以后使用最好的一次模型當做模型保存的結果 model.load_state_dict(best_model_wts) return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs # 第三部分:進行模型的測試 def predict(model_name, num_classes, feature_extract, image_path): # 獲得初始化的模型 model_ft, inputs_size = initiallize_model(model_name, num_classes, feature_extract) model_ft.to(device) # 加載訓練好的網絡結構 filename = 'checkpoint.pth' checkpoint = torch.load(filename) best_acc = checkpoint['best_acc'] model_ft.load_state_dict(checkpoint['state_dict']) # 將輸入的圖片進行處理,使得可以用於進行網絡的訓練 def process_image(image_path): # 讀取測試圖片 img = Image.open(image_path) if img.size[0] > img.size[1]: img.thumbnail((10000, 256)) else: img.thumbnail((256, 10000)) # Crop操作 left_margin = (img.width - 224) / 2 right_margin = left_margin + 224 bottom_margin = (img.height - 224) / 2 top_margin = bottom_margin + 224 img = img.crop((left_margin, bottom_margin, right_margin, top_margin)) img = np.array(img) / 255 mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) img = (img - mean) / std img = img.transpose([2, 0, 1]) return img img = process_image(image_path) outputs = model_ft(torch.tensor([img])) # 進行一張圖片的測試 # 第二步: 獲得一個batch的測試數據進行測試 dataiter = iter(dataloaders['valid']) images, labels = dataiter.next() model_ft.eval() if train_on_gpu: outputs = model_ft(images.cuda()) else: outputs = model_ft(images) _, preds_tensor = torch.max(outputs, 1) preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy()) fig = plt.figure(figsize=(20, 20)) columns = 4 rows = 2 for idx in range(columns * rows): ax = fig.add_subplot(row, columns, idx + 1, xticks=[], yticks=[]) plt.imshow(im_convert(images[idx])) ax.set_title("{}?{}".format(preds[idx], labels.data[idx]), color='green' if preds[idx] == labels.data[idx] else 'red') plt.show() if __name__=='__main__': # train_model(model_ft, dataloaders, criterion, optmizer_ft) image_path = r'C:\Users\qq302\Desktop\pytorch學習\第四章卷積神經網絡實戰\flower_data\train\4\image_0242.jpg' predict(model_name, 17, feature_extract, image_path)