CNN與RNN的結合
問題
前幾天學習了RNN的推導以及代碼,那么問題來了,能不能把CNN和RNN結合起來,我們通過CNN提取的特征,能不能也將其看成一個序列呢?答案是可以的。
但是我覺得一般直接提取的特征喂給哦RNN訓練意義是不大的,因為RNN擅長處理的是不定長的序列,也就是說,seq size是不確定的,但是一般圖像特征的神經元數量都是定的,這個時候再接個rnn說實話意義不大,除非設計一種結構可以讓網絡不定長輸出。(我的一個簡單想法就是再設計一條之路去學習一個神經元權重mask,按照規則過濾掉一些神經元,然后丟進rnn或者lstm訓練)
如何實現呢
import torch
import torch.nn as nn
from torchsummary import summary
from torchvision import datasets,transforms
import torch.optim as optim
from tqdm import tqdm
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(1,16,kernel_size = 3,stride=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.Conv2d(16,64,kernel_size = 3,stride=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64,128,kernel_size = 3,stride=2),
nn.BatchNorm2d(128),
nn.ReLU(),
)
self.rnn = nn.RNN(128,256,2) # input_size,output_size,hidden_num
self.h0 = torch.zeros(2,32,256) # 層數 batchsize hidden_dim
self.predictor = nn.Linear(4*256,10)
def forward(self,x):
x = self.feature_extractor(x) # (-1,128,2,2),4個神經元,128維度
x,ht = self.rnn(x.permute(3,4,0,1).contiguous().view(4,-1,128),self.h0) # (h*w,batch_size,hidden_dim)
x = self.predictor(x.view(-1,256*4))
return x
if __name__ == "__main__":
model = Model()
#summary(model,(1,28,28),device = "cpu")
loss_fn = nn.CrossEntropyLoss()
train_dataset = datasets.MNIST(root="./data/",train = True,transform = transforms.ToTensor(),download = True)
test_dataset = datasets.MNIST(root="./data/",train = False,transform = transforms.ToTensor(),download = True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=32,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=128,
shuffle=False)
optimizer = optim.Adam(model.parameters(),lr = 1e-3)
print(len(train_loader))
for epoch in range(100):
epoch_loss = 0.
for x,target in train_loader:
#print(x.size())
y = model(x)
loss = loss_fn(y,target)
epoch_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("epoch : {} and loss is : {}".format(epoch +1,epoch_loss))
torch.save(model.state_dict(),"rnn_cnn.pth")
上面代碼可以看出我已經規定了RNN輸入神經元的個數,所以肯定是定長的輸入,我訓練之后是可以收斂的。
對於不定長,其實還是沒辦法改變每個batch的seq len,因為規定的一定是最長的seq len,所以沒辦法做到真正的不定長。所以我能做的就是通過支路學習一個權重作用到原來的feature上去,這個權重是0-1權重,其實這樣就可以達到效果了。
import torch
import torch.nn as nn
from torchsummary import summary
from torchvision import datasets,transforms
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(1,16,kernel_size = 3,stride=2),
nn.BatchNorm2d(16),
nn.ReLU6(),
nn.Conv2d(16,64,kernel_size = 3,stride=2),
nn.BatchNorm2d(64),
nn.ReLU6(),
nn.Conv2d(64,128,kernel_size = 3,stride=2),
nn.BatchNorm2d(128),
nn.ReLU6(),
)
self.attn = nn.Conv2d(128,1,kernel_size = 1)
self.rnn = nn.RNN(128,256,2) # input_size,output_size,hidden_num
self.h0 = torch.zeros(2,32,256) # 層數 batchsize hidden_dim
self.predictor = nn.Linear(4*256,10)
def forward(self,x):
x = self.feature_extractor(x) # (-1,128,2,2),4個神經元,128維度
attn = F.relu(self.attn(x)) # (-1,1,2,2) -> (-1,4)
x = x * attn
#print(x.size())
x,ht = self.rnn(x.permute(3,4,0,1).contiguous().view(4,-1,128),self.h0) # (h*w,batch_size,hidden_dim)
#self.h0 = ht
x = self.predictor(x.view(-1,256*4))
return x
if __name__ == "__main__":
model = Model()
#summary(model,(1,28,28),device = "cpu")
#exit()
loss_fn = nn.CrossEntropyLoss()
train_dataset = datasets.MNIST(root="./data/",train = True,transform = transforms.ToTensor(),download = True)
test_dataset = datasets.MNIST(root="./data/",train = False,transform = transforms.ToTensor(),download = True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=32,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=128,
shuffle=False)
optimizer = optim.Adam(model.parameters(),lr = 1e-3)
print(len(train_loader))
for epoch in range(100):
epoch_loss = 0.
for x,target in train_loader:
#print(x.size())
y = model(x)
loss = loss_fn(y,target)
epoch_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("epoch : {} and loss is : {}".format(epoch +1,epoch_loss))
torch.save(model.state_dict(),"rnn_cnn.pth")
我自己訓練了一下,后者要比前者收斂的快的多。