一、LeNet-5
Lenet-5的結構很簡單,但是包含神經網絡的基本結構,用的是5*5卷積和平均池化,可以用來作為簡單的練習,其結構圖下:
代碼:
import tensorflow as tf import matplotlib.pyplot as plt import os # gpus=tf.config.list_physical_devices('GPU') # tf.config.experimental.set_visible_devices(devices=gpus[2:4], device_type='GPU') # os.environ["CUDA_VISIBLE_DEVICES"] = "-0" # 讀取數據集,28*28像素1通道mnist圖片,標簽為10類 (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() train_images = tf.reshape(train_images, (train_images.shape[0], train_images.shape[1], train_images.shape[2], 1)) print(train_images.shape) test_images = tf.reshape(test_images, (test_images.shape[0], test_images.shape[1], test_images.shape[2], 1)) # 數據集歸一化 train_images = train_images / 255 train_labels = train_labels / 255 # 進行數據的歸一化,加快計算的進程 # 創建模型結構 net_input = tf.keras.Input(shape=(28, 28, 1)) l1_con = tf.keras.layers.Conv2D(6, (5, 5), padding='valid', activation='sigmoid')(net_input) # 6個5*5卷積層,sigmod激活函數 l1_pool = tf.keras.layers.AveragePooling2D((2, 2), (2, 2))(l1_con) # 2*2,stride=2的平均池化 l2_con = tf.keras.layers.Conv2D(16, (5, 5), padding='valid', activation='sigmoid')(l1_pool) # 16個5*5卷積層,sigmod激活函數 l2_pool = tf.keras.layers.AveragePooling2D((2, 2), (2, 2))(l2_con) # 2*2,stride=2的平均池化 flat = tf.keras.layers.Flatten()(l2_pool) l3_dense = tf.keras.layers.Dense(120, activation='sigmoid')(flat) # 全連接層 l4_dense = tf.keras.layers.Dense(84, activation='sigmoid')(l3_dense) # 全連接層 net_output = tf.keras.layers.Dense(10, activation='softmax')(l4_dense) # 輸出層 # 創建模型類 model = tf.keras.Model(inputs=net_input, outputs=net_output) # 查看模型的結構 model.summary() # 模型編譯 model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01), loss="sparse_categorical_crossentropy", metrics=['acc']) # 模型訓練 history = model.fit(train_images, train_labels, batch_size=50, epochs=5, validation_split=0.1, verbose=1) # 模型評估 model.evaluate(test_images, test_labels)
二、AlexNet
相較於LeNet-5,AlexNet有比較大的特點,如:
- AlexNet的結構更深,因為AlexNet用2個3*3的卷積代替了LeNet的5*5卷積,雖然感受野相同,但是計算的參數變少了。
- AlexNet使用了relu函數。
- AlexNet使用了LRN層,但是現在基本不用,因為沒什么效果。
- AlexNet使用了drop層避免過擬合。
- AlexNet對數據集進行了數據增強。
結構圖如下:
實現代碼:
import tensorflow as tf from tensorflow import keras import matplotlib.pyplot as plt import numpy as np # 加載數據集 (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() # 零均值歸一化 def normalize(X_train, X_test): X_train = X_train / 255. X_test = X_test / 255. mean = np.mean(X_train, axis=(0, 1, 2, 3)) # 均值 std = np.std(X_train, axis=(0, 1, 2, 3)) # 標准差 print('mean:', mean, 'std:', std) X_train = (X_train - mean) / (std + 1e-7) X_test = (X_test - mean) / (std + 1e-7) return X_train, X_test # 預處理 def preprocess(x, y): x = tf.image.resize(x, (227, 227)) # 將32*32的圖片放大為227*227的圖片 x = tf.cast(x, tf.float32) y = tf.cast(y, tf.int32) y = tf.squeeze(y, axis=1) # 將(50000, 1)的數組轉化為(50000)的Tensor y = tf.one_hot(y, depth=10) return x, y # 零均值歸一化 x_train, x_test = normalize(x_train, x_test) # 預處理 train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_db = train_db.shuffle(50000).batch(128).map(preprocess) # 每個批次128個訓練樣本 test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)) test_db = test_db.shuffle(10000).batch(128).map(preprocess) # 每個批次128個測試樣本 # 可以自定義填充數量的卷積層 class ConvWithPadding(tf.keras.layers.Layer): def __init__(self, kernel, filters, strides, padding): super().__init__() self.kernel = kernel self.filters = filters self.strides = strides self.padding = padding def build(self, input_shape): self.w = tf.random.normal([self.filters, self.filters, input_shape[-1], self.kernel]) def call(self, inputs): return tf.nn.conv2d(inputs, filters=self.w, strides=self.strides, padding=self.padding) batch = 32 alex_net = keras.Sequential([ # 卷積層1 keras.layers.Conv2D(96, 11, 4), # 輸入為227*227@3的圖片,通過96個大小為11*11@3的卷積核,步長為4,無填充,后得到55*55@96的特征圖 keras.layers.ReLU(), # ReLU激活 keras.layers.MaxPooling2D((3, 3), 2), # 重疊最大池化,大小為3*3,步長為2,最后得到27*27@96的特征圖 keras.layers.BatchNormalization(), # 卷積層2 # ConvWithPadding(kernel=256, filters=5, strides=1, padding=[[0, 0], [2, 2], [2, 2], [0, 0]]), keras.layers.Conv2D(256, 5, 1, padding='same'), # 輸入27*27@96,卷積核256個,大小5*5@96,步長1,填充2,得到27*27@96(與輸入等長寬)特征圖 keras.layers.ReLU(), keras.layers.MaxPooling2D((3, 3), 2), # 重疊最大池化,大小為3*3,步長為2,最后得到13*13@256的特征圖 keras.layers.BatchNormalization(), # 卷積層3 keras.layers.Conv2D(384, 3, 1, padding='same'), # 輸入13*13@256,卷積核384個,大小3*3@256,步長1,填充1,得到13*13@384(與輸入等長寬)特征圖 keras.layers.ReLU(), # 卷積層4 keras.layers.Conv2D(384, 3, 1, padding='same'), # 輸入13*13@384,卷積核384個,大小3*3@384,步長1,填充1,得到13*13@384(與輸入等長寬)特征圖 keras.layers.ReLU(), # 卷積層5 keras.layers.Conv2D(256, 3, 1, padding='same'), # 輸入13*13@384,卷積核256個,大小3*3@384,步長1,填充1,得到13*13@256(與輸入等長寬)特征圖 keras.layers.ReLU(), keras.layers.MaxPooling2D((3, 3), 2), # 重疊最大池化,大小為3*3,步長為2,最后得到6*6@256的特征圖 # 全連接層1 keras.layers.Flatten(), # 將6*6@256的特征圖拉伸成9216個像素點 keras.layers.Dense(4096), # 9216*4096的全連接 keras.layers.ReLU(), keras.layers.Dropout(0.25), # Dropout 25%的神經元 # 全連接層2 keras.layers.Dense(4096), # 4096*4096的全連接 keras.layers.ReLU(), keras.layers.Dropout(0.25), # Dropout 25%的神經元 # 全連接層3 keras.layers.Dense(10, activation='softmax') # 4096*10的全連接,通過softmax后10分類 ]) alex_net.build(input_shape=[batch, 227, 227, 3]) alex_net.summary() # 網絡編譯參數設置 loss = keras.losses.CategoricalCrossentropy() alex_net.compile(optimizer=keras.optimizers.Adam(0.00001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) # 訓練 history = alex_net.fit(train_db, epochs=10) # 損失下降曲線 plt.plot(history.history['loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') # 測試 alex_net.evaluate(test_db) plt.show()
三、VGGNet-16
VGGNet16通過反復的堆疊3*3的小型卷積核和2*2的最大池化層,成功的構建了16~19層深的卷積神經網絡,它通過兩個3*3卷積層替代1個5*5的卷積層,3個3*3的卷積層替代1個7*7的卷積,這樣替換有相同的感受野,但是參數卻少了很多。該模型還不夠深,只達到19層便飽和了,而且沒有探索卷積核寬度對網絡性能的影響。同時網絡參數過多,達到1.3億參數以上。以下是VGGNet-16的結構:
代碼實現:
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import os gpus=tf.config.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(devices=gpus[2:8], device_type='GPU') # os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 讀取數據集,32*32像素3通道圖片,標簽為10類 (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() # 零均值歸一化 def normalize(X_train, X_test): X_train = X_train / 255. X_test = X_test / 255. mean = np.mean(X_train, axis=(0, 1, 2, 3)) # 均值 std = np.std(X_train, axis=(0, 1, 2, 3)) # 標准差 X_train = (X_train - mean) / (std + 1e-7) X_test = (X_test - mean) / (std + 1e-7) return X_train, X_test # 預處理 def preprocess(x, y): x = tf.cast(x, tf.float32) y = tf.cast(y, tf.int32) y = tf.squeeze(y, axis=1) # 將(50000, 1)的數組轉化為(50000)的Tensor y = tf.one_hot(y, depth=10) return x, y # 訓練數據標准化 train_images, test_images = normalize(train_images, test_images) # 預處理 train_db = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) train_db = train_db.shuffle(50000).batch(100).map(preprocess) # 每個批次128個訓練樣本 test_db = tf.data.Dataset.from_tensor_slices((test_images, test_labels)) test_db = test_db.shuffle(10000).batch(100).map(preprocess) # 每個批次128個測試樣本 # 創建模型結構 # 模型輸入 VGGNet_input = tf.keras.Input((32, 32, 3)) c1 = tf.keras.layers.Conv2D(64, (3, 3), padding='same')(VGGNet_input) # 第一層 b1 = tf.keras.layers.BatchNormalization()(c1) a1 = tf.keras.layers.Activation('relu')(b1) c2 = tf.keras.layers.Conv2D(64, (3, 3), padding='same')(a1) # 第二層 b2 = tf.keras.layers.BatchNormalization()(c2) a2 = tf.keras.layers.Activation('relu')(b2) p2 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2))(a2) d2 = tf.keras.layers.Dropout(0.2)(p2) c3 = tf.keras.layers.Conv2D(128, (3, 3), padding='same')(d2) # 第三層 b3 = tf.keras.layers.BatchNormalization()(c3) a3 = tf.keras.layers.Activation('relu')(b3) c4 = tf.keras.layers.Conv2D(128, (3, 3), padding='same')(a3) # 第四層 b4 = tf.keras.layers.BatchNormalization()(c4) a4 = tf.keras.layers.Activation('relu')(b4) p4 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2))(a4) d4 = tf.keras.layers.Dropout(0.2)(p4) c5 = tf.keras.layers.Conv2D(256, (3, 3), padding='same')(d4) # 第五層 b5 = tf.keras.layers.BatchNormalization()(c5) a5 = tf.keras.layers.Activation('relu')(b5) c6 = tf.keras.layers.Conv2D(256, (3, 3), padding='same')(a5) # 第六層 b6 = tf.keras.layers.BatchNormalization()(c6) a6 = tf.keras.layers.Activation('relu')(b6) c7 = tf.keras.layers.Conv2D(256, (3, 3), padding='same')(a6) # 第七層 b7 = tf.keras.layers.BatchNormalization()(c7) a7 = tf.keras.layers.Activation('relu')(b7) p7 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2))(a7) d7 = tf.keras.layers.Dropout(0.2)(p7) c8 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(d7) # 第八層 b8 = tf.keras.layers.BatchNormalization()(c8) a8 = tf.keras.layers.Activation('relu')(b8) c9 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(a8) # 第九層 b9 = tf.keras.layers.BatchNormalization()(c9) a9 = tf.keras.layers.Activation('relu')(b9) c10 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(a9) # 第十層 b10 = tf.keras.layers.BatchNormalization()(c10) a10 = tf.keras.layers.Activation('relu')(b10) p10 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2))(a10) d10 = tf.keras.layers.Dropout(0.2)(p10) c11 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(d10) # 第十一層 b11 = tf.keras.layers.BatchNormalization()(c11) a11 = tf.keras.layers.Activation('relu')(b11) c12 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(a11) # 第十二層 b12 = tf.keras.layers.BatchNormalization()(c12) a12 = tf.keras.layers.Activation('relu')(b12) c13 = tf.keras.layers.Conv2D(512, (3, 3), padding='same')(a12) # 第十三層 b13 = tf.keras.layers.BatchNormalization()(c13) a13 = tf.keras.layers.Activation('relu')(b13) p13 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2))(a13) d13 = tf.keras.layers.Dropout(0.2)(p13) f14 = tf.keras.layers.Flatten()(d13) # 第十四層 den14 = tf.keras.layers.Dense(512, 'relu')(f14) d14 = tf.keras.layers.Dropout(0.2)(den14) den15 = tf.keras.layers.Dense(512, 'relu')(d14) # 第十五層 d15 = tf.keras.layers.Dropout(0.2)(den15) VGGNet_output = tf.keras.layers.Dense(10, 'softmax')(d15) # 第十六層 VGGNet=tf.keras.Model(inputs=VGGNet_input,outputs=VGGNet_output) # 構建模型 # 查看模型的結構 VGGNet.summary() # 模型編譯 VGGNet.compile( optimizer=tf.keras.optimizers.Adam(0.00001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'] ) VGGNet.fit(train_db)