1.LeNet
LeNet是指LeNet-5,它是第一個成功應用於數字識別的卷積神經網絡。在MNIST數據集上,可以達到99.2%的准確率。LeNet-5模型總共有7層,包括兩個卷積層,兩個池化層,兩個全連接層和一個輸出層。
import torch import torch.nn as nn from torch.autograd import Variable #方形卷積核和等長的步長 m1=nn.Conv2d(16,33,3,stride=2) #非長方形卷積核,非等長的步長和邊界填充 m2=nn.Conv2d(16,33,(3,5),stride=(2,1),padding=(4,2)) #非方形卷積核,非等長的步長,邊界填充和空間間隔 m3=nn.Conv2d(16,33,(3,5),stride=(2,1),padding=(4,2),dilation=(3,1)) input=Variable(torch.randn(20,16,50,100)) output=m2(input)
####LeNet的PyTorch實現 class LeNet(nn.Module): def __init__(self): super(LeNet,self).__init__() self.conv1=nn.Conv2d(3,6,5) self.conv2=nn.Conv2d(6,16,5) self.fc1=nn.Linear(16*5*5,120) self.fc2=nn.Linear(120,84) self.fc3=nn.Linear(84,10) def forward(self,x): out=F.relu(self.conv1(x)) out=F.max_pool2d(out,2) out=F.relu(self.conv2(out)) out=F.max_pool2d(out,2) #這句話一般出現在model類的forward函數中,具體位置一般都是在調用分類器之前。 #分類器是一個簡單的nn.Linear()結構,輸入輸出都是維度為一的值,x = x.view(x.size(0), -1) #這句話的出現就是為了將前面多維度的tensor展平成一維 #x = x.view(batchsize, -1)中batchsize指轉換后有幾行, #而-1指在不告訴函數有多少列的情況下,根據原tensor數據和batchsize自動分配列數。 out=out.view(out.size(0),-1) out=F.relu(self.fc1(out)) our=F.relu(self.fc2(out)) out=self.fc3(out) return out
2.AlexNet
AlexNet具有更深的網絡結構,使用層疊的卷積層,同時增加了Dropout和數據增強,並使用ReLU代替了之前的sigmoid函數,采用多GPU訓練。
AlexNet共8層,前5層為卷積層,后3層為全連接層。
#####AlexNet的PyTorch實現 class AlexNet(nn.Module): def __init__(self,num_classes): super(AlexNet,self).__init__() self.features=nn.Sequential( nn.Conv2d(3,96,kernel_size=11,stride=4,padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,stride=2), nn.Conv2d(96,256,kernel_size=5,padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,stride=2), nn.Conv2d(256,384,kernel_size=3,padding=1), nn.ReLU(inplace=True), nn.Conv2d(384,384,kernel_size=3,padding=1), nn.ReLU(inplace=True), nn.Conv2d(384,256,kernel_size=3,padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,stride=2), ) self.classifier=nn.Sequential( nn.Dropout(), nn.Linear(256*6*6,4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096,4096), nn.ReLU(inplace=True), nn.Linear(4096,num_classes), ) def forward(self,x): x=self.features() x=x.view(x.size(0),256*6*6) x=self.classifier(x) return x
3.VGGNet
VGGNet采用了幾個3*3的卷積核代替AlexNet中較大的卷積核,模型由若干卷積層和池化層堆疊而成。
####VGGNet的實現 cfg={ 'VGG11':[64,'M',128,'M',256,256,'M',512,512,'M',512,512,'M'], 'VGG13':[64,64,'M',128,128,'M',256,256,'M',512,512,'M',512,512,'M'], 'VGG16':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M'], 'VGG19':[64,64,'M',128,128,'M',256,256,256,256,'M',512,512,512,512,'M',512,512,512,512,'M'], } class VGG(nn.Module): def __init__(self,vgg_name): super(VGG,self).__init__() self.features=self._make_layers(cfg[vgg_name]) self.classifier=nn.Linear(512,10) def forward(self,x): out=self.features(x) out=out.view(out.size(0),-1) out=self.classifier(out) return out def _make_layers(self,cfg): layers=[] in_channels=3 for x in cfg: if x =='M': layers+=[nn.MaxPool2d(kernel_size=2,stride=2)] else: layers+=[nn.Conv2d(in_channels,x,kernal_size=3,padding=1),nn.BatchNorm2d(x),nn.ReLU(inplace=True)] in_channels=x layers+=[nn.AvgPool2d(kernel_size=1,stride=1)] return nn.Sequential(*layers)
4.GooLeNet
'''GoogLeNet with PyTorch.''' import torch import torch.nn as nn import torch.nn.functional as F # 編寫卷積+bn+relu模塊 class BasicConv2d(nn.Module): def __init__(self, in_channels, out_channals, **kwargs): super(BasicConv2d, self).__init__() self.conv = nn.Conv2d(in_channels, out_channals, **kwargs) self.bn = nn.BatchNorm2d(out_channals) def forward(self, x): x = self.conv(x) x = self.bn(x) return F.relu(x) # 編寫Inception模塊 class Inception(nn.Module): def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): super(Inception, self).__init__() # 1x1 conv branch self.b1 = BasicConv2d(in_planes, n1x1, kernel_size=1) # 1x1 conv -> 3x3 conv branch self.b2_1x1_a = BasicConv2d(in_planes, n3x3red, kernel_size=1) self.b2_3x3_b = BasicConv2d(n3x3red, n3x3, kernel_size=3, padding=1) # 1x1 conv -> 3x3 conv -> 3x3 conv branch self.b3_1x1_a = BasicConv2d(in_planes, n5x5red, kernel_size=1) self.b3_3x3_b = BasicConv2d(n5x5red, n5x5, kernel_size=3, padding=1) self.b3_3x3_c = BasicConv2d(n5x5, n5x5, kernel_size=3, padding=1) # 3x3 pool -> 1x1 conv branch self.b4_pool = nn.MaxPool2d(3, stride=1, padding=1) self.b4_1x1 = BasicConv2d(in_planes, pool_planes, kernel_size=1) def forward(self, x): y1 = self.b1(x) y2 = self.b2_3x3_b(self.b2_1x1_a(x)) y3 = self.b3_3x3_c(self.b3_3x3_b(self.b3_1x1_a(x))) y4 = self.b4_1x1(self.b4_pool(x)) # y的維度為[batch_size, out_channels, C_out,L_out] # 合並不同卷積下的特征圖 return torch.cat([y1, y2, y3, y4], 1) class GoogLeNet(nn.Module): def __init__(self): super(GoogLeNet, self).__init__() self.pre_layers = BasicConv2d(3, 192, kernel_size=3, padding=1) self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) self.avgpool = nn.AvgPool2d(8, stride=1) self.linear = nn.Linear(1024, 10) def forward(self, x): out = self.pre_layers(x) out = self.a3(out) out = self.b3(out) out = self.maxpool(out) out = self.a4(out) out = self.b4(out) out = self.c4(out) out = self.d4(out) out = self.e4(out) out = self.maxpool(out) out = self.a5(out) out = self.b5(out) out = self.avgpool(out) out = out.view(out.size(0), -1) out = self.linear(out) return out def test(): net = GoogLeNet() x = torch.randn(1,3,32,32) y = net(x) print(y.size()) test()