#densenet原文地址 https://arxiv.org/abs/1608.06993
#densenet介紹 https://blog.csdn.net/zchang81/article/details/76155291
#以下代碼就是densenet在torchvision.models里的源碼,為了提高自身的模型構建能力嘗試分析下源代碼:
import re import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.model_zoo as model_zoo from collections import OrderedDict __all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161'] model_urls = { 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', } #這個是預訓練模型可以在下邊的densenet121,169等里直接在pretrained=True就可以下載 def densenet121(pretrained=False, **kwargs): #這是densenet121 返回一個在ImageNet上的預訓練模型 # r"""Densenet-121 model from `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), **kwargs) #這里是模型的主要構建,使用了DenseNet類 直接就看Densenet類# if pretrained: # '.'s are no longer allowed in module names, but pervious _DenseLayer # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. # They are also in the checkpoints in model_urls. This pattern is used # to find such keys. pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') state_dict = model_zoo.load_url(model_urls['densenet121']) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) return model #把densenet169等就刪除了,和上邊的結構相同。 #
class DenseNet(nn.Module): #這就是densenet的主類了,看繼承了nn.Modele類 # r"""Densenet-BC model class, based on `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_ Args: growth_rate (int) - how many filters to add each layer (`k` in paper) #每個denseblock里應該,每個Layer的輸出特征數,就是論文里的k # block_config (list of 4 ints) - how many layers in each pooling block #每個denseblock里layer層數, block_config的長度表示block的個數 # num_init_features (int) - the number of filters to learn in the first convolution layer #初始化層里卷積輸出的channel數# bn_size (int) - multiplicative factor for number of bottle neck layers #這個是在block里一個denselayer里兩個卷積層間的channel數 需要bn_size*growth_rate # (i.e. bn_size * k features in the bottleneck layer) drop_rate (float) - dropout rate after each dense layer #dropout的概率,正則化的方法 # num_classes (int) - number of classification classes #輸出的類別數,看后邊接的是linear,應該最后加損失函數的時候應該加softmax,或者交叉熵,而且是要帶計算概率的函數 # """ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): super(DenseNet, self).__init__() # First convolution #初始化層,圖像進來后不是直接進入denseblock,先使用大的卷積核,大步長,進一步壓縮圖像尺寸 #
# 注意的是nn.Sequential的用法,ordereddict使用的方法,給layer命名,還有就是各層的排列,conv->bn->relu->pool 經過這一個操作就是尺寸就成為了1/4,數據量壓縮了#
self.features = nn.Sequential(OrderedDict([ ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), ('norm0', nn.BatchNorm2d(num_init_features)), ('relu0', nn.ReLU(inplace=True)), ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ])) #這里使用了batchnorm2d batchnorm 最近有group norm 是否可以換 # # Each denseblock 創建denseblock num_features = num_init_features for i, num_layers in enumerate(block_config): #根據block_config里關於每個denseblock里的layer數量產生響應的block # block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) #這是產生一個denseblock # self.features.add_module('denseblock%d' % (i + 1), block) #加入到 nn.Sequential 里 # num_features = num_features + num_layers * growth_rate #每一個denseblock最后輸出的channel,因為是dense連接所以原始的輸出有,也有內部每一層的特征 # if i != len(block_config) - 1: #如果不是最后一層 # trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) #transition層是壓縮輸出的特征數量為一半# self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 # Final batch norm self.features.add_module('norm5', nn.BatchNorm2d(num_features)) # Linear layer self.classifier = nn.Linear(num_features, num_classes) # Official init from torch repo. for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_() def forward(self, x): features = self.features(x) out = F.relu(features, inplace=True) out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1) out = self.classifier(out) return out
class _DenseLayer(nn.Sequential): #這是denselayer,也是nn.Seqquential,看來要好好學習用法 # def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): super(_DenseLayer, self).__init__() self.add_module('norm1', nn.BatchNorm2d(num_input_features)), #這里要看到denselayer里其實主要包括兩個卷積層,而且他們的channel數值得關注 # self.add_module('relu1', nn.ReLU(inplace=True)), #其實在add_module后邊的逗號可以去掉,沒有任何意義,又不是構成元組徒增歧義 # self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)), self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), self.add_module('relu2', nn.ReLU(inplace=True)), self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)), #這里注意的是輸出的channel數是growth_rate # self.drop_rate = drop_rate def forward(self, x): #這里是前傳,主要解決的就是要把輸出整形,把layer的輸出和輸入要cat在一起 # new_features = super(_DenseLayer, self).forward(x) # # if self.drop_rate > 0: new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) #加入dropout增加泛化 # return torch.cat([x, new_features], 1) #在channel上cat在一起,以形成dense連接 # class _DenseBlock(nn.Sequential): #是nn.Sequential的子類,將一個block里的layer組合起來 # def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): super(_DenseBlock, self).__init__() for i in range(num_layers): layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) #后一層的輸入channel是該denseblock的輸入channel數,加上該層前面層的channnel數的和 # self.add_module('denselayer%d' % (i + 1), layer) class _Transition(nn.Sequential): #是nn.Sequential的子類,#這個就比較容易了,也是以后自己搭建module的案例#
def __init__(self, num_input_features, num_output_features):
super(_Transition, self).__init__()
self.add_module('norm', nn.BatchNorm2d(num_input_features))
self.add_module('relu', nn.ReLU(inplace=True))
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
kernel_size=1, stride=1, bias=False))
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))'pool', nn.AvgPool2d(kernel_size=2, stride=2))
大概就是這樣,作為去年最好的分類框架densenet,里邊有很多學習的地方。
可以給自己搭建網絡提供參考。