關於 CNN 基礎理論可見:卷積神經網絡
TensorFlow2.0 快速搭建神經網絡:tf.keras
下面主要介紹:1.搭建卷積神經網絡的主要模塊:卷積、批標准化、激活、池化、全連接;
2.經典卷積網絡的搭建:LeNet、AlexNet、VGGNet、InceptionNet、ResNet。
1 卷積神經網絡主要模塊
1.1 卷積 (Convolutional)
tf.keras.layers.Conv2D ( filters = 卷積核個數, kernel_size = 卷積核尺寸, #正方形寫核長整數,或(核高h,核寬w) strides = 滑動步長, #橫縱向相同寫步長整數,或(縱向步長h,橫向步長w),默認1 padding = “same” or “valid”, #使用全零填充是“same”,不使用是“valid”(默認) activation = “ relu ” or “ sigmoid ” or “ tanh ” or “ softmax”等 , #如有BN此處不寫 input_shape = (高, 寬 , 通道數) #輸入特征圖維度,可省略 )
eg:
model = tf.keras.models.Sequential([ Conv2D(6, 5, padding='valid', activation='sigmoid'), MaxPool2D(2, 2), Conv2D(6, (5, 5), padding='valid', activation='sigmoid'), MaxPool2D(2, (2, 2)), Conv2D(filters=6, kernel_size=(5, 5),padding='valid', activation='sigmoid'), MaxPool2D(pool_size=(2, 2), strides=2), Flatten(), Dense(10, activation='softmax') ])
1.2 批標准化 (Batch Normalization,BN)

此外,還可以引入可訓練參數 γ (縮放因子) 和 β (偏移因子),調整批歸一化的力度:
BN 層位於卷積層之后,激活層之前,eg:
model = tf.keras.models.Sequential([ Conv2D(filters=6, kernel_size=(5, 5), padding='same'), # 卷積層 BatchNormalization(), # BN層 Activation('relu'), # 激活層 MaxPool2D(pool_size=(2, 2), strides=2, padding='same'), # 池化層 Dropout(0.2), # dropout層 ])
1.3 池化 (Pooling)
tf.keras.layers.MaxPool2D( pool_size=池化核尺寸,#正方形寫核長整數,或(核高h,核寬w) strides=池化步長,#步長整數, 或(縱向步長h,橫向步長w),默認為pool_size padding=‘valid’or‘same’ #使用全零填充是“same”,不使用是“valid”(默認) ) tf.keras.layers.AveragePooling2D( pool_size=池化核尺寸,#正方形寫核長整數,或(核高h,核寬w) strides=池化步長,#步長整數, 或(縱向步長h,橫向步長w),默認為pool_size padding=‘valid’or‘same’ #使用全零填充是“same”,不使用是“valid”(默認) )
eg:
model = tf.keras.models.Sequential([ Conv2D(filters=6, kernel_size=(5, 5), padding='same'), # 卷積層 BatchNormalization(), # BN層 Activation('relu'), # 激活層 MaxPool2D(pool_size=(2, 2), strides=2, padding='same'), # 池化層 Dropout(0.2), # dropout層 ])
1.4 舍棄 (Dropout)
model = tf.keras.models.Sequential([ Conv2D(filters=6, kernel_size=(5, 5), padding='same'), # 卷積層 BatchNormalization(), # BN層 Activation('relu'), # 激活層 MaxPool2D(pool_size=(2, 2), strides=2, padding='same'), # 池化層 Dropout(0.2), # dropout層 ])
1.5 卷積神經網絡
model = tf.keras.models.Sequential([ Conv2D(filters=6, kernel_size=(5, 5), padding='same'), # 卷積層 BatchNormalization(), # BN層 Activation('relu'), # 激活層 MaxPool2D(pool_size=(2, 2), strides=2, padding='same'), # 池化層 Dropout(0.2), # dropout層 ])
1.6 卷積神經網絡搭建示例
- C(核:6*5*5,步長:1,填充:same )
- B(Yes)
- A(relu)
- P(max,核:2*2,步長:2,填充:same)
- D(0.2)
- Flatten
- Dense(神經元:128,激活:relu,Dropout:0.2)
- Dense(神經元:10,激活:softmax)
數據集:Cifar10
import tensorflow as tf import os import numpy as np from matplotlib import pyplot as plt np.set_printoptions(threshold=np.inf) cifar10 = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0class Baseline(tf.keras.Model): def __init__(self): super(Baseline, self).__init__() self.c1 = tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), padding='same') self.b1 = tf.keras.layers.BatchNormalization() self.a1 = tf.keras.layers.Activation('relu') self.p1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d1 = tf.keras.layers.Dropout(0.2) self.flatten = tf.keras.layers.Flatten() self.f1 = tf.keras.layers.Dense(128, activation='relu') self.d2 = tf.keras.layers.Dropout(0.2) self.f2 = tf.keras.layers.Dense(10, activation='softmax') def call(self, inputs): x = self.c1(inputs) x = self.b1(x) x = self.a1(x) x = self.p1(x) x = self.d1(x) x = self.flatten(x) x = self.f1(x) x = self.d2(x) y = self.f2(x) return y model = Baseline() model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=[tf.keras.metrics.sparse_categorical_accuracy]) history = model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), validation_freq=1) model.summary() # show acc = history.history['sparse_categorical_accuracy'] val_acc = history.history['val_sparse_categorical_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(acc, label='Training Accuracy') plt.plot(val_acc, label='Validation Accuracy') plt.title('Training and Validation Accuracy') plt.legend() plt.subplot(1, 2, 2) plt.plot(loss, label='Training loss') plt.plot(val_loss, label='Validation loss') plt.title('Training and Validation loss') plt.legend() plt.show()
2 經典卷積網絡
2.1 LeNet

class LeNet5(tf.keras.Model): def __init__(self): super(LeNet5, self).__init__() self.c1 = tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), activation='sigmoid') self.p1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2) self.c2 = tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), activation='sigmoid') self.p2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2) self.flatten = tf.keras.layers.Flatten() self.f1 = tf.keras.layers.Dense(120, activation='sigmoid') self.f2 = tf.keras.layers.Dense(84, activation='sigmoid') self.f3 = tf.keras.layers.Dense(10, activation='softmax') def call(self, inputs): x = self.c1(inputs) x = self.p1(x) x = self.c2(x) x = self.p2(x) x = self.flatten(x) x = self.f1(x) x = self.f2(x) y = self.f3(x) return y
2.2 AlexNet

class AlexNet8(tf.keras.Model): def __init__(self): super(AlexNet8, self).__init__() self.c1 = tf.keras.layers.Conv2D(filters=96, kernel_size=(3, 3)) self.b1 = tf.keras.layers.BatchNormalization() self.a1 = tf.keras.layers.Activation('relu') self.p1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2) self.c2 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3)) self.b2 = tf.keras.layers.BatchNormalization() self.a2 = tf.keras.layers.Activation('relu') self.p2 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2) self.c3 = tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu') self.c4 = tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu') self.c5 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu') self.p3 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2) self.flatten = tf.keras.layers.Flatten() self.f1 = tf.keras.layers.Dense(2048, activation='relu') self.d1 = tf.keras.layers.Dropout(0.5) self.f2 = tf.keras.layers.Dense(2048, activation='relu') self.d2 = tf.keras.layers.Dropout(0.5) self.f3 = tf.keras.layers.Dense(10, activation='softmax') def call(self, inputs): x = self.c1(inputs) x = self.b1(x) x = self.a1(x) x = self.p1(x) x = self.c2(x) x = self.b2(x) x = self.a2(x) x = self.p2(x) x = self.c3(x) x = self.c4(x) x = self.c5(x) x = self.p3(x) x = self.flatten(x) x = self.f1(x) x = self.d1(x) x = self.f2(x) x = self.d2(x) y = self.f3(x) return y
2.3 VGGNet
VGGNet 誕生於2014年,當年 ImageNet 競賽的亞軍,Top5錯誤率減小到7.3%。


class VGG16(tf.keras.Model): def __init__(self): super(VGG16, self).__init__() self.c1 = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same') self.b1 = tf.keras.layers.BatchNormalization() self.a1 = tf.keras.layers.Activation('relu') self.c2 = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same') self.b2 = tf.keras.layers.BatchNormalization() self.a2 = tf.keras.layers.Activation('relu') self.p1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d1 = tf.keras.layers.Dropout(0.2) self.c3 = tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same') self.b3 = tf.keras.layers.BatchNormalization() self.a3 = tf.keras.layers.Activation('relu') self.c4 = tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same') self.b4 = tf.keras.layers.BatchNormalization() self.a4 = tf.keras.layers.Activation('relu') self.p2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d2 = tf.keras.layers.Dropout(0.2) self.c5 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same') self.b5 = tf.keras.layers.BatchNormalization() self.a5 = tf.keras.layers.Activation('relu') self.c6 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same') self.b6 = tf.keras.layers.BatchNormalization() self.a6 = tf.keras.layers.Activation('relu') self.c7 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same') self.b7 = tf.keras.layers.BatchNormalization() self.a7 = tf.keras.layers.Activation('relu') self.p3 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d3 = tf.keras.layers.Dropout(0.2) self.c8 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b8 = tf.keras.layers.BatchNormalization() self.a8 = tf.keras.layers.Activation('relu') self.c9 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b9 = tf.keras.layers.BatchNormalization() self.a9 = tf.keras.layers.Activation('relu') self.c10 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b10 = tf.keras.layers.BatchNormalization() self.a10 = tf.keras.layers.Activation('relu') self.p4 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d4 = tf.keras.layers.Dropout(0.2) self.c11 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b11 = tf.keras.layers.BatchNormalization() self.a11 = tf.keras.layers.Activation('relu') self.c12 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b12 = tf.keras.layers.BatchNormalization() self.a12 = tf.keras.layers.Activation('relu') self.c13 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3, 3), padding='same') self.b13 = tf.keras.layers.BatchNormalization() self.a13 = tf.keras.layers.Activation('relu') self.p5 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same') self.d5 = tf.keras.layers.Dropout(0.2) self.flatten = tf.keras.layers.Flatten() self.f1 = tf.keras.layers.Dense(512, activation='relu') self.d1 = tf.keras.layers.Dropout(0.2) self.f2 = tf.keras.layers.Dense(512, activation='relu') self.d2 = tf.keras.layers.Dropout(0.2) self.f3 = tf.keras.layers.Dense(10, activation='softmax')
2.4 InceptionNet

可以看到,InceptionNet 的基本單元中,卷積部分是比較統一的 C、B、A 典型結構,即卷積→BN→激活,激活均采用 Relu 激活函數,同時包含最大池化操作。
class ConvBNRelu(tf.keras.Model): def __init__(self, ch, kernelsz=3, strides=1, padding='same'): super(ConvBNRelu, self).__init__() self.model = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(ch, kernelsz, strides=strides, padding=padding), tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu') ]) def call(self, inputs): x = self.model(inputs) return x
class InceptionBlk(tf.keras.Model): def __init__(self, ch, strides=1): super(InceptionBlk, self).__init__() self.ch = ch self.strides = strides self.c1 = ConvBNRelu(ch, kernelsz=1, strides=strides) self.c2_1 = ConvBNRelu(ch, kernelsz=1, strides=strides) self.c2_2 = ConvBNRelu(ch, kernelsz=3, strides=strides) self.c3_1 = ConvBNRelu(ch, kernelsz=1, strides=strides) self.c3_2 = ConvBNRelu(ch, kernelsz=5, strides=strides) self.p4_1 = tf.keras.layers.MaxPool2D(3, strides=1, padding='same') self.c4_2 = ConvBNRelu(ch, kernelsz=1, strides=strides) def call(self, inputs): x1 = self.model(inputs) x2_1 = self.c2_1(inputs) x2_2 = self.c2_2(x2_1) x3_1 = self.c3_1(inputs) x3_2 = self.c3_2(x3_1) x4_1 = self.p4_1(inputs) x4_2 = self.c4_2(x4_1) x = tf.concat([x1, x2_2, x3_2, x4_2], axis=3) return x

class Inception10(tf.keras.Model): def __init__(self, num_blocks, num_classes, init_ch=16, **kwargs): super(Inception10, self).__init__() self.in_channels = init_ch self.out_channels = init_ch self.num_blocks = num_blocks self.init_ch = init_ch self.c1 = ConvBNRelu(init_ch) self.blocks = tf.keras.Sequential() for block_id in range(num_blocks): for layer_id in range(2): if layer_id == 0: block = InceptionBlk(self.out_channels, strides=2) else: block = InceptionBlk(self.out_channels, strides=1) self.blocks.add(block) self.out_channels *= 2 self.p1 = tf.keras.layers.GlobalAveragePooling2D() self.f1 = tf.keras.layers.Dense(num_classes, activation='softmax') def call(self, inputs): x = self.c1(inputs) x = self.blocks(x) x = self.p1(x) y = self.f1(x) return y
2.5 ResNet
ResNet 的核心是殘差結構,具體可見 CNN 理論基礎。ResNet 引入殘差結構最主要的目的是解決網絡層數不斷加深時導致的梯度消失問題,從之前介紹的 4 種 CNN 經典網絡結構我們也可以看出,網絡層數的發展趨勢是不斷加深的。這是由於深度網絡本身集成了低層/中層/高層特征和分類器,以多層首尾相連的方式存在,所以可以通過增加堆疊的層數(深度)來豐富特征的層次,以取得更好的效果。
但如果只是簡單地堆疊更多層數,就會導致梯度消失(爆炸)問題,它從根源上導致了函數無法收斂。然而,通過標准初始化(normalized initialization)以及中間標准化層(intermediate normalization layer),已經可以較好地解決這個問題了,這使得深度為數十層的網絡在反向傳播過程中,可以通過隨機梯度下降(SGD)的方式開始收斂。
但是,當深度更深的網絡也可以開始收斂時,網絡退化的問題就顯露了出來:隨着網絡深度的增加,准確率先是達到瓶頸(這是很常見的),然后便開始迅速下降。需要注意的是,這種退化並不是由過擬合引起的。對於一個深度比較合適的網絡來說,繼續增加層數反而會導致訓練錯誤率的提升。ResNet 解決的正是這個問題。
class ResnetBlock(tf.keras.Model): def __init__(self, filters, strides=1, residual_path=False): super(ResnetBlock, self).__init__() self.filters = filters self.strides = strides self.residual_path = residual_path self.c1 = tf.keras.layers.Conv2D(filters, (3, 3), strides=strides, padding='same', use_bias=False) self.b1 = tf.keras.layers.BatchNormalization() self.a1 = tf.keras.layers.Activation('relu') self.c1 = tf.keras.layers.Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False) self.b1 = tf.keras.layers.BatchNormalization() if residual_path: self.down_c1 = tf.keras.layers.Conv2D(filters, (1, 1), strides=strides, padding='same', use_bias=False) self.down_b1 = tf.keras.layers.BatchNormalization() self.a2 = tf.keras.layers.Activation('relu') def call(self, inputs): residual = inputs x = self.c1(inputs) x = self.b1(x) x = self.a1(x) x = self.c2(x) y = self.b2(x) if self.residual_path: residual = self.down_c1(inputs) residual = self.down_b1(residual) out = self.a2(y + residual) return out class ResNet18(tf.keras.Model): def __init__(self, block_list, initial_filters=64): super(ResNet18, self).__init__() self.num_block = len(block_list) self.block_list = block_list self.out_filters = initial_filters self.c1 = tf.keras.layers.Conv2D(self.out_filters, (3, 3), strides=1, padding='same', use_bias=False, kernel_initializer='he_normal') self.b1 = tf.keras.layers.BatchNormalization() self.a1 = tf.keras.layers.Activation('relu') self.blocks = tf.keras.models.Sequential() for block_id in range(len(block_list)): # 第幾個 resnet block for layer_id in range(block_list[block_id]): # 第幾個卷積層 if block_id != 0 and layer_id == 0: block = ResnetBlock(self.out_filters, strides=2, residual_path=True) else: block = ResnetBlock(self.out_filters,residual_path=False) self.blocks.add(block) self.out_filters *= 2 # 下一個block的卷積核是上一個block的兩倍 self.p1 = tf.keras.layers.GlobalAveragePooling2D() self.f1 = tf.keras.layers.Dense(10) def call(self, inputs): x = self.c1(inputs) x = self.b1(x) x = self.a1(x) x = self.blocks(x) x = self.p1(x) y = self.f1(x) return y model = ResNet18([2, 2, 2, 2])
對於 ResNet 的殘差單元來說,除了這里采用的兩層結構外,還有一種三層結構。兩層殘差單元多用於層數較少的網絡,三層殘差單元多用於層數較多的網絡,以減少計算的參數量。