首次體驗Pytorch,本文參考於:github and PyTorch 中文網人臉相似度對比
本文主要熟悉Pytorch大致流程,修改了讀取數據部分。沒有采用原作者的ImageFolder
方法: ImageFolder(root, transform=None, target_transform=None, loader=default_loader)。而是采用了一種更自由的方法,利用了Dataset 和 DataLoader 自由實現,更加適合於不同數據的預處理導入工作。
Siamese網絡不用多說,就是兩個共享參數的CNN。每次的輸入是一對圖像+1個label,共3個值。注意label=0或1(又稱正負樣本),表示輸入的兩張圖片match(匹配、同一個人)或no-match(不匹配、非同一人)。 下圖是Siamese基本結構,圖是其他論文隨便找的,輸入看做兩張圖片就好。只不過下圖是兩個光普段而已。
1. 數據處理
數據采用的是AT&T人臉數據。共40個人,每個人有10張臉。數據下載:AT&T
首先解壓后發現文件夾下共40個文件夾,每個文件夾里有10張pgm圖片。這里生成一個包含圖片路徑的train.txt文件共后續調用:
def convert(train=True): if(train): f=open(Config.txt_root, 'w') data_path=root+'/train/' if(not os.path.exists(data_path)): os.makedirs(data_path) for i in range(40): for j in range(10): img_path = data_path+'s'+str(i+1)+'/'+str(j+1)+'.pgm' f.write(img_path+' '+str(i)+'\n') f.close()
生成結果:每行前面為每張圖片的完整路徑, 后面數字為類別標簽0~39。train文件夾下為s1~s40共40個子文件夾。
2. 定制個性化數據集
這一步驟主要繼承了類Dataset,然后重寫getitem和len方法即可:
class MyDataset(Dataset): # 集成Dataset類以定制 def __init__(self, txt, transform=None, target_transform=None, should_invert=False): self.transform = transform self.target_transform = target_transform self.should_invert = should_invert self.txt = txt # 之前生成的train.txt def __getitem__(self, index): line = linecache.getline(self.txt, random.randint(1, self.__len__())) # 隨機選擇一個人臉 line.strip('\n') img0_list= line.split() should_get_same_class = random.randint(0,1) # 隨機數0或1,是否選擇同一個人的臉,這里為了保證盡量使匹配和非匹配數據大致平衡(正負類樣本相當) if should_get_same_class: # 執行的話就挑一張同一個人的臉作為匹配樣本對 while True: img1_list = linecache.getline(self.txt, random.randint(1, self.__len__())).strip('\n').split() if img0_list[1]==img1_list[1]: break else: # else就是隨意挑一個人的臉作為非匹配樣本對,當然也可能抽到同一個人的臉,概率較小而已 img1_list = linecache.getline(self.txt, random.randint(1,self.__len__())).strip('\n').split() img0 = Image.open(img0_list[0]) # img_list都是大小為2的列表,list[0]為圖像, list[1]為label img1 = Image.open(img1_list[0]) img0 = img0.convert("L") # 轉為灰度 img1 = img1.convert("L") if self.should_invert: # 是否進行像素反轉操作,即0變1,1變0 img0 = PIL.ImageOps.invert(img0) img1 = PIL.ImageOps.invert(img1) if self.transform is not None: # 非常方便的transform操作,在實例化時可以進行任意定制 img0 = self.transform(img0) img1 = self.transform(img1) return img0, img1 , torch.from_numpy(np.array([int(img1_list[1]!=img0_list[1])],dtype=np.float32)) # 注意一定要返回數據+標簽, 這里返回一對圖像+label(應由numpy轉為tensor) def __len__(self): # 數據總長 fh = open(self.txt, 'r') num = len(fh.readlines()) fh.close() return num
3. 制作雙塔CNN
class SiameseNetwork(nn.Module): def __init__(self): super(SiameseNetwork, self).__init__() self.cnn1 = nn.Sequential( nn.ReflectionPad2d(1), nn.Conv2d(1, 4, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(4), nn.Dropout2d(p=.2), nn.ReflectionPad2d(1), nn.Conv2d(4, 8, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(8), nn.Dropout2d(p=.2), nn.ReflectionPad2d(1), nn.Conv2d(8, 8, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(8), nn.Dropout2d(p=.2), ) self.fc1 = nn.Sequential( nn.Linear(8*100*100, 500), nn.ReLU(inplace=True), nn.Linear(500, 500), nn.ReLU(inplace=True), nn.Linear(500, 5) ) def forward_once(self, x): output = self.cnn1(x) output = output.view(output.size()[0], -1) output = self.fc1(output) return output def forward(self, input1, input2): output1 = self.forward_once(input1) output2 = self.forward_once(input2) return output1, output2
很簡單,沒說的,注意前向傳播是兩張圖同時輸入進行。
4. 定制對比損失函數
# Custom Contrastive Loss class ContrastiveLoss(torch.nn.Module): """ Contrastive loss function. Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf """ def __init__(self, margin=2.0): super(ContrastiveLoss, self).__init__() self.margin = margin def forward(self, output1, output2, label): euclidean_distance = F.pairwise_distance(output1, output2) loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) + # calmp夾斷用法 (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)) return loss_contrastive
上面的損失函數為自己制作的,公式源於lecun文章:
Loss =
DW=
m為容忍度, Dw為兩張圖片的歐氏距離。
5. 訓練一波
train_data = MyDataset(txt = Config.txt_root,transform=transforms.Compose( [transforms.Resize((100,100)),transforms.ToTensor()]), should_invert=False) #Resize到100,100 train_dataloader = DataLoader(dataset=train_data, shuffle=True, num_workers=2, batch_size = Config.train_batch_size) net = SiameseNetwork().cuda() # GPU加速 criterion = ContrastiveLoss() optimizer = optim.Adam(net.parameters(), lr=0.0005) counter = [] loss_history =[] iteration_number =0 for epoch in range(0, Config.train_number_epochs): for i, data in enumerate(train_dataloader, 0): img0, img1, label = data img0, img1, label = Variable(img0).cuda(), Variable(img1).cuda(), Variable(label).cuda() output1, output2 = net(img0, img1) optimizer.zero_grad() loss_contrastive = criterion(output1, output2, label) loss_contrastive.backward() optimizer.step() if i%10 == 0: print("Epoch:{}, Current loss {}\n".format(epoch,loss_contrastive.data[0])) iteration_number += 10 counter.append(iteration_number) loss_history.append(loss_contrastive.data[0]) show_plot(counter, loss_history) # plot 損失函數變化曲線
損失函數結果圖:
batch_size=32, epoches=20, lr=0.001 batch_size=32, epoches=30, lr=0.0005
全部代碼:

#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Jan 24 10:00:24 2018 Paper: Siamese Neural Networks for One-shot Image Recognition links: https://www.cnblogs.com/denny402/p/7520063.html """ import torch from torch.autograd import Variable import os import random import linecache import numpy as np import torchvision from torch.utils.data import Dataset, DataLoader from torchvision import transforms from PIL import Image import PIL.ImageOps import matplotlib.pyplot as plt class Config(): root = '/home/lps/Spyder/data_faces/' txt_root = '/home/lps/Spyder/data_faces/train.txt' train_batch_size = 32 train_number_epochs = 30 # Helper functions def imshow(img,text=None,should_save=False): npimg = img.numpy() plt.axis("off") if text: plt.text(75, 8, text, style='italic',fontweight='bold', bbox={'facecolor':'white', 'alpha':0.8, 'pad':10}) plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() def show_plot(iteration,loss): plt.plot(iteration,loss) plt.show() def convert(train=True): if(train): f=open(Config.txt_root, 'w') data_path=root+'/train/' if(not os.path.exists(data_path)): os.makedirs(data_path) for i in range(40): for j in range(10): img_path = data_path+'s'+str(i+1)+'/'+str(j+1)+'.pgm' f.write(img_path+' '+str(i)+'\n') f.close() #convert(True) # ready the dataset, Not use ImageFolder as the author did class MyDataset(Dataset): def __init__(self, txt, transform=None, target_transform=None, should_invert=False): self.transform = transform self.target_transform = target_transform self.should_invert = should_invert self.txt = txt def __getitem__(self, index): line = linecache.getline(self.txt, random.randint(1, self.__len__())) line.strip('\n') img0_list= line.split() should_get_same_class = random.randint(0,1) if should_get_same_class: while True: img1_list = linecache.getline(self.txt, random.randint(1, self.__len__())).strip('\n').split() if img0_list[1]==img1_list[1]: break else: img1_list = linecache.getline(self.txt, random.randint(1,self.__len__())).strip('\n').split() img0 = Image.open(img0_list[0]) img1 = Image.open(img1_list[0]) img0 = img0.convert("L") img1 = img1.convert("L") if self.should_invert: img0 = PIL.ImageOps.invert(img0) img1 = PIL.ImageOps.invert(img1) if self.transform is not None: img0 = self.transform(img0) img1 = self.transform(img1) return img0, img1 , torch.from_numpy(np.array([int(img1_list[1]!=img0_list[1])],dtype=np.float32)) def __len__(self): fh = open(self.txt, 'r') num = len(fh.readlines()) fh.close() return num # Visualising some of the data """ train_data=MyDataset(txt = Config.txt_root, transform=transforms.ToTensor(), transform=transforms.Compose([transforms.Scale((100,100)), transforms.ToTensor()], should_invert=False)) train_loader = DataLoader(dataset=train_data, batch_size=8, shuffle=True) #it = iter(train_loader) p1, p2, label = it.next() example_batch = it.next() concatenated = torch.cat((example_batch[0],example_batch[1]),0) imshow(torchvision.utils.make_grid(concatenated)) print(example_batch[2].numpy()) """ # Neural Net Definition, Standard CNNs import torch.nn as nn import torch.nn.functional as F import torch.optim as optim class SiameseNetwork(nn.Module): def __init__(self): super(SiameseNetwork, self).__init__() self.cnn1 = nn.Sequential( nn.ReflectionPad2d(1), nn.Conv2d(1, 4, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(4), nn.Dropout2d(p=.2), nn.ReflectionPad2d(1), nn.Conv2d(4, 8, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(8), nn.Dropout2d(p=.2), nn.ReflectionPad2d(1), nn.Conv2d(8, 8, kernel_size=3), nn.ReLU(inplace=True), nn.BatchNorm2d(8), nn.Dropout2d(p=.2), ) self.fc1 = nn.Sequential( nn.Linear(8*100*100, 500), nn.ReLU(inplace=True), nn.Linear(500, 500), nn.ReLU(inplace=True), nn.Linear(500, 5) ) def forward_once(self, x): output = self.cnn1(x) output = output.view(output.size()[0], -1) output = self.fc1(output) return output def forward(self, input1, input2): output1 = self.forward_once(input1) output2 = self.forward_once(input2) return output1, output2 # Custom Contrastive Loss class ContrastiveLoss(torch.nn.Module): """ Contrastive loss function. Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf """ def __init__(self, margin=2.0): super(ContrastiveLoss, self).__init__() self.margin = margin def forward(self, output1, output2, label): euclidean_distance = F.pairwise_distance(output1, output2) loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) + (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)) return loss_contrastive # Training train_data = MyDataset(txt = Config.txt_root,transform=transforms.Compose( [transforms.Resize((100,100)),transforms.ToTensor()]), should_invert=False) train_dataloader = DataLoader(dataset=train_data, shuffle=True, num_workers=2, batch_size = Config.train_batch_size) net = SiameseNetwork().cuda() criterion = ContrastiveLoss() optimizer = optim.Adam(net.parameters(), lr=0.0005) counter = [] loss_history =[] iteration_number =0 for epoch in range(0, Config.train_number_epochs): for i, data in enumerate(train_dataloader, 0): img0, img1, label = data img0, img1, label = Variable(img0).cuda(), Variable(img1).cuda(), Variable(label).cuda() output1, output2 = net(img0, img1) optimizer.zero_grad() loss_contrastive = criterion(output1, output2, label) loss_contrastive.backward() optimizer.step() if i%10 == 0: print("Epoch:{}, Current loss {}\n".format(epoch,loss_contrastive.data[0])) iteration_number += 10 counter.append(iteration_number) loss_history.append(loss_contrastive.data[0]) show_plot(counter, loss_history)
原作者jupyter notebook下載:Siamese Neural Networks for One-shot Image Recognition
更多資料:Some important Pytorch tasks
利用Siamese network 來解決 one-shot learning:https://sorenbouma.github.io/blog/oneshot/ 譯文: 【深度神經網絡 One-shot Learning】孿生網絡少樣本精准分類
A PyTorch Implementation of "Siamese Neural Networks for One-shot Image Recognition"