版本:Pytorch 1.0 代碼是在jupter中執行的。
導包:
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms
設置超參:
BATCH_SIZE = 512 # 大概需要2G的顯存 EPOCHS = 20 # 總共訓練批次 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
加載數據:
# 下載訓練集 train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = True, download = True, transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True) # 測試集 test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = False, transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True)
構建網絡:方式一
# 定義模型 class ConvNet(nn.Module): def __init__(self): super().__init__() #1*1*28*28 self.conv1 = nn.Conv2d(1, 10, 5) self.conv2 = nn.Conv2d(10, 20, 3) self.fc1 = nn.Linear(20 * 10 * 10, 500) self.fc2 = nn.Linear(500, 10) def forward(self, x): in_size = x.size(0) out= self.conv1(x) # 1* 10 * 24 *24 out = F.relu(out) out = F.max_pool2d(out, 2, 2) # 1* 10 * 12 * 12 out = self.conv2(out) # 1* 20 * 10 * 10 out = F.relu(out) out = out.view(in_size, -1) # 1 * 2000 out = self.fc1(out) # 1 * 500 out = F.relu(out) out = self.fc2(out) # 1 * 10 out = F.log_softmax(out, dim = 1) return out
構建網絡:方式二——把更多的內容放在了Sequential里面,覺得網絡會顯得清楚一些
class MyNet(torch.nn.Module): def __init__(self): super(MyNet, self).__init__() self.conv1 = nn.Sequential( # (1,28,28) nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2), # (16,28,28) # 想要con2d卷積出來的圖片尺寸沒有變化, padding=(kernel_size-1)/2 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # (16,14,14) ) self.conv2 = nn.Sequential( # (16,14,14) nn.Conv2d(16, 32, 5, 1, 2), # (32,14,14) nn.ReLU(), nn.MaxPool2d(2) # (32,7,7) ) self.out = nn.Linear(32*7*7, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(x.size(0), -1) # 將(batch,32,7,7)展平為(batch,32*7*7) output = self.out(x) return output
定義優化器:
#生成模型和優化器 model = MyNet().to(DEVICE) # MyNet可以改為ConvNet 調用不同模型 optimizer = optim.Adam(model.parameters())
定義訓練和測試函數:
# 定義訓練函數 def train(model, device, train_loader, optimizer, epoch): model.train() # 設置為trainning模式 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() # 優化器梯度初始化為零 output = model(data) # 把數據輸入網絡並得到輸出,即進行前向傳播 loss = F.cross_entropy(output, target) # 定義損失函數 loss.backward() # 反向傳播梯度 optimizer.step() # 結束一次前傳+反傳之后,更新參數 if (batch_idx + 1) % 30 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) # 定義測試函數 def test(model, device, test_loader): model.eval() # 設置為test模式 test_loss =0 # 初始化測試損失值為0 correct = 0 # 初始化預測正確的數據個數為0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) #計算前要把變量變成Variable形式,因為這樣子才有梯度 output = model(data) test_loss += F.nll_loss(output, target, reduction = 'sum') # 將一批的損失相加 pred = output.max(1, keepdim = True)[1] # 找到概率最大的下標 correct += pred.eq(target.view_as(pred)).sum().item() # 對預測正確的數據個數進行累加 test_loss /= len(test_loader.dataset) print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%) \n".format( test_loss, correct, len(test_loader.dataset), 100.* correct / len(test_loader.dataset) ))
main函數
# 最后開始訓練和測試 for epoch in range(1, EPOCHS + 1): train(model, DEVICE, train_loader, optimizer, epoch) test(model, DEVICE, test_loader)