1 導入實驗所需要的包
import torch import numpy as np import random from IPython import display from matplotlib import pyplot as plt from torch import nn import torch.utils.data as Data import torch.optim as optim from torch.nn import init import torchvision import torchvision.transforms as transforms
2 下載MNIST數據集
#下載MNIST手寫數據集
mnist_train = torchvision.datasets.MNIST(root='../Datasets/MNIST', train=True,download=True, transform=transforms.ToTensor()) mnist_test = torchvision.datasets.MNIST(root='../Datasets/MNIST', train=False, download=True, transform=transforms.ToTensor())
3 讀取數據
#讀取數據
batch_size = 32 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,num_workers=0) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,num_workers=0)
4 利用torch.nn構建模型以及初始化參數
- 使用ReLU函數
class FlattenLayer(torch.nn.Module): def __init__(self): super(FlattenLayer, self).__init__() def forward(self, x): return x.view(x.shape[0],-1) #模型定義和參數初始化
num_inputs,num_hiddens,num_outputs = 784,256,10
def use_ReLU(): net = nn.Sequential( FlattenLayer(), nn.Linear(num_inputs,num_hiddens), nn.ReLU(), nn.Linear(num_hiddens,num_outputs) ) return net
- 使用ELU函數
def use_ELU(): net = nn.Sequential( FlattenLayer(), nn.Linear(num_inputs,num_hiddens), nn.ELU(), nn.Linear(num_hiddens,num_outputs) ) return net
- 使用Sigmoid函數
def use_Sigmoid(): net = nn.Sequential( FlattenLayer(), nn.Linear(num_inputs,num_hiddens), nn.Sigmoid(), nn.Linear(num_hiddens,num_outputs) ) return net
初始化參數
def init_params(net): for params in net.parameters(): init.normal_(params,mean=0,std=0.01) return torch.optim.SGD(net.parameters(),lr)
5 定義交叉熵損失函數
#訓練次數和學習率
num_epochs = 50 lr = 0.01
#定義交叉熵損失函數
loss_fn = torch.nn.CrossEntropyLoss()
6 定義測試集 loss 和 准確率
def evaluate_testset(data_iter,net): acc_sum,loss_sum,n = 0.0,0.0,0 for X,y in data_iter: y_hat = net(X) acc_sum += (y_hat.argmax(dim=1)==y).sum().item() l = loss_fn(y_hat,y) # l是有關小批量X和y的損失
loss_sum += l.item()*y.shape[0] n+=y.shape[0] return acc_sum/n,loss_sum/n
7 定義模型訓練函數
#定義模型訓練函數
def train(model,train_loader,test_loader,loss_fn,num_epochs,batch_size,params=None,lr=None,optimizer=None): train_ls = [] test_ls = [] for epoch in range(num_epochs): # 訓練模型一共需要num_epochs個迭代周期
train_loss_sum, train_acc_num,total_examples = 0.0,0.0,0 for x, y in train_loader: # x和y分別是小批量樣本的特征和標簽
y_pred = model(x) loss = loss_fn(y_pred, y) #計算損失
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向傳播
optimizer.step() #梯度更新
total_examples += y.shape[0] train_loss_sum += loss.item() train_acc_num += (y_pred.argmax(dim=1)==y).sum().item() train_ls.append(train_loss_sum) test_acc,test_loss = evaluate_testset(test_loader,model) test_ls.append(test_loss) print('epoch %d, train_loss %.6f,test_loss %f,train_acc %.6f,test_acc %.6f'%(epoch+1, train_ls[epoch],test_ls[epoch],train_acc_num/total_examples,test_acc)) return
8 繪制訓練集和測試集的loss曲線方法
def show_plots(mytrain_loss,mytest_loss): x = np.linspace(0,len(mytrain_loss),len(mytest_loss)) plt.plot(x,train_loss,label="train_loss",linewidth=1.5) plt.plot(x,test_loss,label="test_loss",linewidth=1.5) plt.xlabel("epoch") plt.ylabel("loss") plt.legend() plt.show()
9 開始訓練模型
使用 ReLU 激活函數的損失分析以及訓練集和測試集loss曲線
net = use_ReLU() optimizer = init_params(net) train_loss,test_loss = train(net,train_iter,test_iter,loss,num_epochs,batch_size,net.parameters,lr,optimizer) show_plots(train_loss,test_loss )
使用 ELU 激活函數的損失分析以及訓練集和測試集loss曲線
net = use_ELU() optimizer = init_params(net) train_loss,test_loss = train(net,train_iter,test_iter,loss_fn,num_epochs,batch_size,net.parameters,lr,optimizer) show_plots(train_loss,test_loss )
使用 Sigmoid 激活函數的損失分析以及訓練集和測試集loss曲線
net = use_Sigmoid() optimizer = init_params(net) train_loss,test_loss = train(net,train_iter,test_iter,loss_fn,num_epochs,batch_size,net.parameters,lr,optimizer) show_plots(train_loss,test_loss )