PyTorch教程 | 1 圖片數據建模流程范例


構建數據流程是實踐過程中核心環節。熟悉pipeline的的構建過程,有助於理解不同代碼的結構,也是實現自主創建網絡的第一步。

使用Pytorch實現神經網絡模型的一般流程包括:1,准備數據 2,定義模型 3,訓練模型 4,評估模型 5,使用模型 6,保存模型。

1- 數據加載
在Pytorch中構建圖片數據管道通常有三種方法。
第一種是使用 torchvision中的datasets.ImageFolder來讀取圖片然后用 DataLoader來並行加載。
第二種是通過繼承 torch.utils.data.Dataset 實現用戶自定義讀取邏輯然后用 DataLoader來並行加載。
第三種方法是讀取用戶自定義數據集的通用方法,既可以讀取圖片數據集,也可以讀取文本數據集。
 
2- 定義模型
使用Pytorch通常有三種方式構建模型:使用nn.Sequential按層順序構建模型,繼承nn.Module基類構建自定義模型,繼承nn.Module基類構建模型並輔助應用模型容器(nn.Sequential,nn.ModuleList,nn.ModuleDict)進行封裝。
 
3- 訓練模型
Pytorch通常需要用戶編寫自定義訓練循環,大致分為腳本形式訓練循環,函數形式訓練循環,類形式訓練循環。
 
以下為詳細示例代碼
 
#1 准備數據

import torch
from torch import nn
from torch.utils.data import Dataset, Dataloader
from torchvision import transforms, datasets

#轉換
transforms_train = transforms.Compose([transforms.ToTensor(),
                                       transforms.Scale(40),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.RandomCrop(32),])
transforms_val   = transforms.Compose([transforms.ToTensor()])



#加載
ds_train = datasets.ImageFolder("./data/cifar2/train/",
            transform = transform_train, target_transform= lambda t:torch.tensor([t]).float())
ds_valid = datasets.ImageFolder("./data/cifar2/test/",
            transform = transform_valid,target_transform= lambda t:torch.tensor([t]).float())

print(ds_train.class_to_idx)

dl_train = Dataloader(ds_train, batch_size=50, shuffle=True, num_workers=3)
dl_valid = DataLoader(ds_valid,batch_size = 50,shuffle = True,num_workers=3)


#顯示
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

#查看部分樣本
from matplotlib import pyplot as plt 

plt.figure(figsize=(8,8)) 
for i in range(9):
    img,label = ds_train[i]
    img = img.permute(1,2,0)
    ax=plt.subplot(3,3,i+1)
    ax.imshow(img.numpy())
    ax.set_title("label = %d"%label.item())
    ax.set_xticks([])
    ax.set_yticks([]) 
plt.show()


# Pytorch的圖片默認順序是 Batch,Channel,Width,Height
for x,y in dl_train:
    print(x.shape,y.shape) 
    break

#2- 定義模型
#此處為繼承nn.Module基類
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channel=3, out_channels=32, kernel_size=3)
        self.pool = nn.Maxpool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
        self.dropout = nn.Dropout2d(p = 0.1)
        self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(64,32)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(32,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.dropout(x)
        x = self.adaptive_pool(x)
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        y = self.sigmoid(x)
        return y

net = Net()
print(net)

import torchkeras
torchkeras.summary(net, input_shape=(3, 32, 32))

#3 訓練模型
#此處為函數形式訓練循環
import pandas as pd 
from sklearn.metrics import roc_auc_score

model = net
model.optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
model.loss_func = torch.nn.BCELoss()
model.metric_func = lambda y_pred, y_true: roc_auc_score(y_true.data.numpy(),y_pred.data.numpy())
model.metric_name = "auc"


def train_step(model, features, label):
    #訓練模式,dropout層發生作用
    model.train()

    #梯度清零
    model.optimizer.zero_grad()

    #正向傳播求損失
    predictions = model(features)
    loss = model.loss_func(predictions, labels)
    metric = model.metric_func(predictions, labels)

    #反向傳播求梯度
    loss.backward()
    model.optimizer.step()

    return loss.item(), metric.item()

def valid_step(model,features,labels):
    # 預測模式,dropout層不發生作用
    model.eval()
    # 關閉梯度計算
    with torch.no_grad():
        predictions = model(features)
        loss = model.loss_func(predictions, labels)
        metric = model.metric_func(parameters, labels)

    return loss.item(), metric.item()

# 測試train_step效果
features,labels = next(iter(dl_train))
train_step(model,features,labels)


def train_model(model, epochs, dl_train, dl_valid, log_step_freq):
    metric_name = model.metric_name
    dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name]) 
    print("Start Training...")
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("=========="*8 + "%s"%nowtime)


    for epoch in range(1, epochs+1):
        # 1,訓練循環-------------------------------------------------
        loss_sum = 0.0
        metric_sum = 0.0
        step = 1

        for step, (features, labels) in enumerate(dl_train, 1):
            loss, metric = train_step(model, features, labels)

            # 打印batch級別日志
            loss_sum += loss
            metric_sum += metric
            if step%log_step_freq == 0:   
                print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
                      (step, loss_sum/step, metric_sum/step))

        # 2,驗證循環-------------------------------------------------
        val_loss_sum = 0.0
        val_metric_sum = 0.0
        val_step = 1

        for val_step, (features,labels) in enumerate(dl_valid, 1):

            val_loss,val_metric = valid_step(model,features,labels)

            val_loss_sum += val_loss
            val_metric_sum += val_metric

        # 3,記錄日志-------------------------------------------------
        info = (epoch, loss_sum/step, metric_sum/step, 
                val_loss_sum/val_step, val_metric_sum/val_step)
        dfhistory.loc[epoch-1] = info

        # 打印epoch級別日志
        print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
              "  = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f") 
              %info)
        nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("\n"+"=========="*8 + "%s"%nowtime)

    print('Finished Training...')
    
    return dfhistory


epochs = 20
dfhistory = train_model(model, epochs, dl_train, dl_valid, log_step_freq=50)


#4 評估模型
#打印history
print(dfhistory)

#查看loss曲線
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt

def plot_metric(dfhistory, metric):
    train_metrics = dfhistory[metric]
    val_metrics = dfhistory['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics, 'bo--')
    plt.plot(epochs, val_metrics, 'ro-')
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()

plot_metric(dfhistory,"loss")
plot_metric(dfhistory,"auc")

#5 使用模型
def predict(model, dl):
    model.eval()
    with torch.no_grad():
        result = torch.cat([model.forward(t[0]) for t in dl])
    return(result.data)


y_pred_probs = predict(model, dl_valid)
print(y_pred_probs)

y_pred = torch.where(y_pred_probs>0.5, 
            torch.ones_like(y_pred_probs), torch.zeros_like(y_pred_probs))
print(y_pred)


#6 保存模型
torch.save(model.state_dict(), "./data/model_parameter.pkl")

net_clone = Net()
net_clone.load_state_dict(torch.load("./data/model_parameter.pkl"))

predict(net_clone,dl_valid)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM