卷積神經網絡入門：LeNet5（手寫體數字識別）詳解

本文轉載自查看原文 2018-10-16 10:19 2832 深度學習

第一張圖包括8層LeNet5卷積神經網絡的結構圖，以及其中最復雜的一層S2到C3的結構處理示意圖。
在這里插入圖片描述
第二張圖及第三張圖是用tensorflow重寫LeNet5網絡及其注釋。

這是原始的LeNet5網絡：

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

# 聲明輸入圖片數據，類別
x = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', [None, 10])
# 輸入圖片數據轉化
x_image = tf.reshape(x, [-1, 28, 28, 1])

#第一層卷積層，初始化卷積核參數、偏置值，該卷積層5*5大小，一個通道，共有6個不同卷積核
filter1 = tf.Variable(tf.truncated_normal([5, 5, 1, 6]))
bias1 = tf.Variable(tf.truncated_normal([6]))
conv1 = tf.nn.conv2d(x_image, filter1, strides=[1, 1, 1, 1], padding='SAME')
h_conv1 = tf.nn.sigmoid(conv1 + bias1)

maxPool2 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')

filter2 = tf.Variable(tf.truncated_normal([5, 5, 6, 16]))
bias2 = tf.Variable(tf.truncated_normal([16]))
conv2 = tf.nn.conv2d(maxPool2, filter2, strides=[1, 1, 1, 1], padding='SAME')
h_conv2 = tf.nn.sigmoid(conv2 + bias2)

maxPool3 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')

filter3 = tf.Variable(tf.truncated_normal([5, 5, 16, 120]))
bias3 = tf.Variable(tf.truncated_normal([120]))
conv3 = tf.nn.conv2d(maxPool3, filter3, strides=[1, 1, 1, 1], padding='SAME')
h_conv3 = tf.nn.sigmoid(conv3 + bias3)



# 全連接層
# 權值參數
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 120, 80]))
# 偏置值
b_fc1 = tf.Variable(tf.truncated_normal([80]))
# 將卷積的產出展開
h_pool2_flat = tf.reshape(h_conv3, [-1, 7 * 7 * 120])
# 神經網絡計算，並添加sigmoid激活函數
h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


# 輸出層，使用softmax進行多分類
W_fc2 = tf.Variable(tf.truncated_normal([80, 10]))
b_fc2 = tf.Variable(tf.truncated_normal([10]))
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
# 損失函數
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 使用GDO優化算法來調整參數
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)

sess = tf.InteractiveSession()
# 測試正確率
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 所有變量進行初始化
sess.run(tf.initialize_all_variables())

# 獲取mnist數據
mnist_data_set = input_data.read_data_sets('MNIST_data', one_hot=True)

# 進行訓練
start_time = time.time()
for i in range(20000):
    # 獲取訓練數據
    batch_xs, batch_ys = mnist_data_set.train.next_batch(200)

    # 每迭代100個 batch，對當前訓練數據進行測試，輸出當前預測准確率
    if i % 2 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})
        print("step %d, training accuracy %g" % (i, train_accuracy))
        # 計算間隔時間
        end_time = time.time()
        print('time: ', (end_time - start_time))
        start_time = end_time
    # 訓練數據
    train_step.run(feed_dict={x: batch_xs, y_: batch_ys})

# 關閉會話
sess.close()

下面是改進后的LeNet5網絡：

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import matplotlib.pyplot as plt


# 初始化單個卷積核上的權重
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


# 初始化單個卷積核上的偏置值
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# 輸入特征x，用卷積核W進行卷積運算，strides為卷積核移動步長，
# padding表示是否需要補齊邊緣像素使輸出圖像大小不變
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


# 對x進行最大池化操作，ksize進行池化的范圍，
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


sess = tf.InteractiveSession()
# 聲明輸入圖片數據，類別
x = tf.placeholder('float32', [None, 784])
y_ = tf.placeholder('float32', [None, 10])
# 輸入圖片數據轉化
x_image = tf.reshape(x, [-1, 28, 28, 1])

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
# 偏置值
b_fc1 = bias_variable([1024])
# 將卷積的產出展開
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
# 神經網絡計算，並添加relu激活函數
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

W_fc2 = weight_variable([1024, 128])
b_fc2 = bias_variable([128])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)

W_fc3 = weight_variable([128, 10])
b_fc3 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3)
# 代價函數
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 使用Adam優化算法來調整參數
train_step = tf.train.GradientDescentOptimizer(1e-5).minimize(cross_entropy)

# 測試正確率
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32"))

# 所有變量進行初始化
sess.run(tf.initialize_all_variables())

# 獲取mnist數據
mnist_data_set = input_data.read_data_sets('MNIST_data', one_hot=True)
c = []

# 進行訓練
start_time = time.time()
for i in range(1000):
    # 獲取訓練數據
    batch_xs, batch_ys = mnist_data_set.train.next_batch(200)

    # 每迭代10個 batch，對當前訓練數據進行測試，輸出當前預測准確率
    if i % 2 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})
        c.append(train_accuracy)
        print("step %d, training accuracy %g" % (i, train_accuracy))
        # 計算間隔時間
        end_time = time.time()
        print('time: ', (end_time - start_time))
        start_time = end_time
    # 訓練數據
    train_step.run(feed_dict={x: batch_xs, y_: batch_ys})

sess.close()
plt.plot(c)
plt.tight_layout()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 人工智能結課作業-BP神經網絡/卷積神經網絡手寫體識別 Pytorch1.0入門實戰一：LeNet神經網絡實現 MNIST手寫數字識別 PyTorch實戰：經典模型LeNet5實現手寫體識別手寫數字圖片識別-卷積神經網絡 pytorch實現MNIST手寫體識別（全連接神經網絡）使用TensorFlow的卷積神經網絡識別手寫數字（1）-預處理篇 Pytorch卷積神經網絡識別手寫數字集基於卷積神經網絡的手寫數字識別分類(Tensorflow) 手寫數字識別-卷積神經網絡cnn（06-2）基於Tensorflow-gpu的mnist手寫數字識別(卷積神經網絡CNN)