寫博客的目的是發現雖然網上有許多深度學習資源可供使用,但是要獨立的完成一個程序,如何恢復調用模型並不是想象的那么容易,踩過許多坑。幸運的是最終完成了設計和論文。貼出來與大家共享一下。
用到的基礎工具:Anaconda,pytq5庫,image庫,TensorFlow(GPU版)
ps:由於篇幅有限,關於用到的各種神經網絡知識部分可以參考我的論文中介紹。
完整代碼已放入[github倉庫]
- 首先解決GUI的問題。網上參考了一些別人的半成品,在自己的加工下湊合能用,基於python的pyqt5庫開發。
先展示下成品:稍后會將代碼放出。
這里我使用了四個py文件:DigitalMnistNum.py,MainWindowC.py,UI_MainWindow.py,run.py
- DigitalMnistNum.py定義了clear,save,recog,以及result事件的操作。初始化時設置保存畫布中圖像為png格式,以及設置大小為28*28pixel。
1 # 定義手寫數字面板類 2 from PyQt5 import QtCore, QtGui, QtWidgets 3 from PyQt5.QtGui import QColor 4 5 6 class DigitalMnistNum(QtWidgets.QWidget): 7 def __init__(self, parent=None): 8 super(DigitalMnistNum, self).__init__(parent) 9 self.pen = QtGui.QPen() 10 self.pen.setStyle(QtCore.Qt.SolidLine) 11 self.pen.setWidth(12) # 筆的粗細 12 self.pen.setColor(QtCore.Qt.white) # 白色字體 13 # 圖片大小為28*28 pixel 14 self.bitmapSize = QtCore.QSize(28, 28) 15 self.resetBitmap() 16 17 def resetBitmap(self): 18 self.pix = QtGui.QBitmap(self.size()) 19 self.pix.fill(QtCore.Qt.black) # 設置黑色背景 20 21 # 清除按鈕 22 def clearBitmap(self): 23 self.resetBitmap() 24 self.update() 25 # 保存圖片格式以及圖片信息 26 27 def recongBitmap(self): 28 pass 29 30 def saveBitmap(self): 31 fileName = str("pic.bmp") 32 tmp = self.pix.scaled( 33 self.bitmapSize, QtCore.Qt.KeepAspectRatio) # 保存圖片 34 QtCore.qDebug(str(tmp.size())) 35 tmp.save(fileName) 36 37 def setBitmapSize(self, size): 38 self.bitmapSize = QtCore.QSize(size[0], size[1])
- 四個鼠標事件:按下,移動,划線,釋放函數參考博客:https://www.cnblogs.com/PyLearn/p/7689170.html

1 # 以下三個函數為記錄鼠標手寫數字事件 2 # 定義鼠標按下事件 3 def mousePressEvent(self, event): 4 if event.button() == QtCore.Qt.LeftButton: 5 self.startPos = event.pos() 6 painter = QtGui.QPainter() 7 painter.begin(self.pix) 8 painter.setPen(self.pen) 9 painter.drawPoint(self.startPos) 10 painter.end() 11 self.update() 12 # 鼠標移動事件 13 14 def mouseMoveEvent(self, event): 15 painter = QtGui.QPainter() 16 painter.begin(self.pix) 17 painter.setPen(self.pen) 18 painter.drawLine(self.startPos, event.pos()) 19 painter.end() 20 self.startPos = event.pos() 21 self.update() 22 # 鼠標畫線事件 23 24 def paintEvent(self, event): 25 if self.size() != self.pix.size(): 26 QtCore.qDebug(str(self.size()) + "," + 27 str(self.pix.size()) + "," + str(event.type())) 28 self.resetBitmap() 29 painter = QtGui.QPainter(self) 30 painter.drawPixmap(QtCore.QPoint(0, 0), self.pix) 31 # 鼠標釋放事件 32 33 def mouseReleaseEvent(self, event): 34 self.update()
- MainWindowC.py
MainWindowC功能是調用clear,save,recong按鈕事件。清除保存畫布比較簡單。
1 class MainWindow(QtWidgets.QMainWindow): 2 def __init__(self, parent=None): 3 super(MainWindow, self).__init__(parent) 4 self.ui = Ui_MainWindow() 5 self.ui.setupUi(self) 6 7 def clearBtn(self): 8 QtCore.qDebug(str("clearBtn")) 9 self.ui.widget.clearBitmap() 10 11 def saveBtn(self): 12 QtCore.qDebug(str("saveBtn")) 13 self.ui.widget.saveBitmap() 14 15 def setLabelText(self, text): 16 self.ui.result.setText(text) 17 18 def setBitmapSize(self, size): 19 self.ui.widget.setBitmapSize(size)
識別事件部分是重點:首先需要對保存的手寫數字圖片進行預處理,打開,調用image庫讀取圖片list格式,還需要對初始值進行轉換,轉換為MNIST數據集中一樣的數據格式。
1 # 預測過程 2 def recongBtn(self): 3 QtCore.qDebug(str("recongBtn")) 4 self.ui.widget.recongBitmap() 5 # 打開自己的圖片地址 6 file_name = R"C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\pic.bmp" 7 img = Image.open(file_name).convert('L') 8 cvtValue = list(img.getdata()) 9 # 初始化圖片的值,1表示純白色,0表示純黑色 10 #resCvtValue = [(255 - x) * 1.0 / 255.0 for x in cvtValue] 11 resCvtValue = [x / 255.0 for x in cvtValue] 12 newShape = array(resCvtValue).reshape(28, 28, 1)
接下來使用到了TensorFlow中關於模型中參數恢復的方法。import_meta_graph()和restore()方法一起使用恢復模型中的參數。假設模型已經事先訓練完畢並保存。
1 # 加載保存的參數 2 with tf.Session() as sess: 3 sess.run(tf.global_variables_initializer()) 4 new_saver = tf.train.import_meta_graph( 5 R'C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md2\init-1000.meta') 6 new_saver.restore(sess, tf.train.latest_checkpoint( 7 R'C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md2')) 8 print("model restore done\n") 9 graph = tf.get_default_graph()
三個代碼段落(注釋掉了兩個)是因為在程序中我使用了三個神經網絡模型,需要分別測試模型的准確率,需要不同的接受圖片輸入格式。設置變量用於接收從模型中恢復的參數的值。
model1表示單層神經網絡,model2表示五層全相連神經網絡,model3表示卷積神經網絡。

1 ''' 2 # model 1 3 W = graph.get_tensor_by_name("W:0") 4 b = graph.get_tensor_by_name("b:0") 5 XX = tf.reshape(newShape, [-1, 784]) 6 Y = tf.nn.softmax(tf.matmul(tf.cast(XX, tf.float32), W) + b) 7 8 feed_dict = {XX: [resCvtValue]} 9 ''' 10 11 # model 2 12 #X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 13 L = 200 14 M = 100 15 N = 60 16 O = 30 17 XX = tf.reshape(newShape, [-1, 784]) 18 W1 = graph.get_tensor_by_name("W1:0") 19 B1 = graph.get_tensor_by_name("B1:0") 20 21 W2 = graph.get_tensor_by_name("W2:0") 22 B2 = graph.get_tensor_by_name("B2:0") 23 24 W3 = graph.get_tensor_by_name("W3:0") 25 B3 = graph.get_tensor_by_name("B3:0") 26 27 W4 = graph.get_tensor_by_name("W4:0") 28 B4 = graph.get_tensor_by_name("B4:0") 29 30 W5 = graph.get_tensor_by_name("W5:0") 31 B5 = graph.get_tensor_by_name("B5:0") 32 33 Y1 = tf.nn.sigmoid(tf.matmul(tf.cast(XX, tf.float32), W1) + B1) 34 Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2) 35 Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3) 36 Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4) 37 Ylogits = tf.matmul(Y4, W5) + B5 38 Y = tf.nn.softmax(Ylogits) 39 feed_dict = {XX: [resCvtValue]} 40 41 ''' 42 # model 3 43 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 44 K = 4 # first convolutional layer output depth 45 L = 8 # second convolutional layer output depth 46 M = 12 # third convolutional layer 47 N = 200 # fully connected layer 48 49 W1 = graph.get_tensor_by_name("W1:0") 50 B1 = graph.get_tensor_by_name("B1:0") 51 stride = 1 # output is 28x28 52 Y1 = tf.nn.relu(tf.nn.conv2d( 53 X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) 54 55 W2 = graph.get_tensor_by_name("W2:0") 56 B2 = graph.get_tensor_by_name("B2:0") 57 stride = 2 # output is 14x14 58 Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[ 59 1, stride, stride, 1], padding='SAME') + B2) 60 61 W3 = graph.get_tensor_by_name("W3:0") 62 B3 = graph.get_tensor_by_name("B3:0") 63 64 stride = 2 # output is 7x7 65 Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[ 66 1, stride, stride, 1], padding='SAME') + B3) 67 68 # reshape the output from the third convolution for the fully connected layer 69 YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) 70 71 W4 = graph.get_tensor_by_name("W4:0") 72 B4 = graph.get_tensor_by_name("B4:0") 73 Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) 74 75 W5 = graph.get_tensor_by_name("W5:0") 76 B5 = graph.get_tensor_by_name("B5:0") 77 78 Ylogits = tf.matmul(Y4, W5) + B5 79 Y = tf.nn.softmax(Ylogits) 80 81 feed_dict = {X: [newShape]} 82 83 '''
- UI_MainWindow.py
UI部分主要是設置窗口大小,畫布大小,按鈕顯示大小等布局。

1 from PyQt5 import QtCore, QtGui, QtWidgets 2 # DigitalMnistNum為數字畫板的子類 3 from DigitalMnistNum import DigitalMnistNum 4 5 6 class Ui_MainWindow(object): 7 def setupUi(self, MainWindow): 8 MainWindow.setObjectName("MainWindow") 9 MainWindow.resize(320, 200) # 主窗口大小 10 sizePolicy = QtWidgets.QSizePolicy( 11 QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed) 12 sizePolicy.setHorizontalStretch(0) 13 sizePolicy.setVerticalStretch(0) 14 sizePolicy.setHeightForWidth( 15 MainWindow.sizePolicy().hasHeightForWidth()) 16 MainWindow.setSizePolicy(sizePolicy) 17 self.centralWidget = QtWidgets.QWidget(MainWindow) 18 self.centralWidget.setObjectName("centralWidget") 19 self.widget = DigitalMnistNum(self.centralWidget) 20 self.widget.setGeometry(QtCore.QRect(30, 20, 140, 140)) # 畫布用140*140 21 self.widget.setObjectName("widget") 22 # 修改右側布局 23 self.verticalLayoutWidget = QtWidgets.QWidget(self.centralWidget) 24 self.verticalLayoutWidget.setGeometry(QtCore.QRect(190, 20, 105, 140)) 25 self.verticalLayoutWidget.setObjectName("verticalLayoutWidget") 26 self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget) 27 self.verticalLayout.setContentsMargins(20, 20, 20, 20) 28 self.verticalLayout.setSpacing(6) 29 self.verticalLayout.setObjectName("verticalLayout") 30 31 self.clearBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 32 self.clearBtn.setObjectName("clearBtn") 33 self.verticalLayout.addWidget(self.clearBtn) 34 35 self.saveBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 36 self.saveBtn.setObjectName("saveBtn") 37 self.verticalLayout.addWidget(self.saveBtn) 38 39 self.recongBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 40 self.recongBtn.setObjectName("recongBtn") 41 self.verticalLayout.addWidget(self.recongBtn) 42 self.result = QtWidgets.QLabel(self.verticalLayoutWidget) 43 44 font = QtGui.QFont() 45 font.setFamily("Arial") 46 font.setPointSize(12) 47 font.setBold(True) 48 font.setWeight(70) 49 # 結果顯示區域 50 self.result.setFont(font) 51 self.result.setObjectName("res") 52 self.verticalLayout.addWidget(self.result) 53 self.verticalLayout.setStretch(0, 1) 54 self.verticalLayout.setStretch(1, 1) 55 self.verticalLayout.setStretch(2, 1) 56 self.verticalLayout.setStretch(3, 2) 57 MainWindow.setCentralWidget(self.centralWidget) 58 59 self.retranslateUi(MainWindow) 60 self.clearBtn.clicked.connect(MainWindow.clearBtn) 61 self.saveBtn.clicked.connect(MainWindow.saveBtn) 62 self.recongBtn.clicked.connect(MainWindow.recongBtn) 63 QtCore.QMetaObject.connectSlotsByName(MainWindow) 64 65 def retranslateUi(self, MainWindow): 66 _translate = QtCore.QCoreApplication.translate 67 MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) 68 self.clearBtn.setText(_translate("MainWindow", "clear")) 69 self.saveBtn.setText(_translate("MainWindow", "save")) 70 self.recongBtn.setText(_translate("MainWindow", "recog")) 71 self.result.setText(_translate("MainWindow", "res"))
- run.py
鏈接上述三個文件,執行。
1 import sys 2 from PyQt5 import QtWidgets, QtGui 3 from MainWindowC import MainWindow 4 5 if __name__ == '__main__': 6 7 app = QtWidgets.QApplication(sys.argv) 8 win = MainWindow() 9 win.show() 10 sys.exit(app.exec_())
2 深度學習網絡模型。
關於神經網絡結構的模型就不廢話了,論文中有詳細介紹。
- 單層神經網絡結構(就不廢話了,直接上代碼)

1 import tensorflow as tf 2 import tensorflowvisu 3 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 4 print("Tensorflow version " + tf.__version__) 5 tf.set_random_seed(0) 6 7 # neural network with 1 layer of 10 softmax neurons 8 # 9 # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28 10 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10] 11 # · · · · · · · · Y [batch, 10] 12 13 # The model is: 14 # 15 # Y = softmax( X * W + b) 16 # X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch) 17 # W: weight matrix with 784 lines and 10 columns 18 # b: bias vector with 10 dimensions 19 # +: add with broadcasting: adds the vector to each line of the matrix (numpy) 20 # softmax(matrix) applies softmax on each line 21 # softmax(line) applies an exp to each value then divides by the norm of the resulting line 22 # Y: output matrix with 100 lines and 10 columns 23 24 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 25 mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0) 26 27 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 28 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 29 # correct answers will go here 30 Y_ = tf.placeholder(tf.float32, [None, 10]) 31 # weights W[784, 10] 784=28*28 32 W = tf.Variable(tf.zeros([784, 10]),name="W") 33 # biases b[10] 34 b = tf.Variable(tf.zeros([10]),name="b") 35 36 # flatten the images into a single line of pixels 37 # -1 in the shape definition means "the only possible dimension that will preserve the number of elements" 38 XX = tf.reshape(X, [-1, 784]) 39 40 # The model 41 Y = tf.nn.softmax(tf.matmul(XX, W) + b) 42 43 # loss function: cross-entropy = - sum( Y_i * log(Yi) ) 44 # Y: the computed output vector 45 # Y_: the desired output vector 46 47 # cross-entropy 48 # log takes the log of each element, * multiplies the tensors element by element 49 # reduce_mean will add all the components in the tensor 50 # so here we end up with the total cross-entropy for all images in the batch 51 cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images, 52 # *10 because "mean" included an unwanted division by 10 53 54 # accuracy of the trained model, between 0 (worst) and 1 (best) 55 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 56 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 57 58 # training, learning rate = 0.005 59 train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cross_entropy) 60 61 # matplotlib visualisation 62 allweights = tf.reshape(W, [-1]) 63 allbiases = tf.reshape(b, [-1]) 64 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) # assembles 10x10 images by default 65 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) # 1000 images on 25 lines 66 datavis = tensorflowvisu.MnistDataVis() 67 68 # init 69 init = tf.global_variables_initializer() 70 sess = tf.Session() 71 sess.run(init) 72 73 74 # You can call this function in a loop to train the model, 100 images at a time 75 def training_step(i, update_test_data, update_train_data): 76 77 # training on batches of 100 images with 100 labels 78 batch_X, batch_Y = mnist.train.next_batch(100) 79 80 # compute training values for visualisation 81 if update_train_data: 82 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y}) 83 datavis.append_training_curves_data(i, a, c) 84 datavis.append_data_histograms(i, w, b) 85 datavis.update_image1(im) 86 print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c)) 87 88 # compute test values for visualisation 89 if update_test_data: 90 a, c, im = sess.run([accuracy, cross_entropy, It], feed_dict={X: mnist.test.images, Y_: mnist.test.labels}) 91 datavis.append_test_curves_data(i, a, c) 92 datavis.update_image2(im) 93 print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 94 95 # the backpropagation training step 96 sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) 97 98 99 datavis.animate(training_step, iterations=2000+1, train_data_update_freq=10, test_data_update_freq=50, more_tests_at_start=True) 100 101 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 102 # to disable the visualisation use the following line instead of the datavis.animate line 103 #for i in range(2000): training_step(i, i % 50 == 0, i % 10 == 0) 104 105 print("max test accuracy: " + str(datavis.get_max_test_accuracy())) 106 107 # final max test accuracy = 0.9268 (10K iterations). Accuracy should peak above 0.92 in the first 2000 iterations. 108 109 saver = tf.train.Saver() 110 md_path = R"C:\Users\yaoya\AppData\Local\conda\conda\envs\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md1\init" 111 # Later, launch the model, initialize the variables, do some work, save the 112 # variables to disk. 113 sess.run(init) 114 save_path = saver.save(sess, md_path, global_step=1000) 115 print("Model saved in file: %s" % save_path)
給出訓練結果過程:
圖形可視化訓練過程:
可以看到識別率最終在92%左右,pretty bad :(。
自己調用模型時的識別結果:
- 全相連神經網絡結構

1 import tensorflow as tf 2 import tensorflowvisu 3 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 4 print("Tensorflow version " + tf.__version__) 5 tf.set_random_seed(0) 6 7 # neural network with 5 layers 8 # 9 # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28*28 10 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W1 [784, 200] B1[200] 11 # · · · · · · · · · Y1 [batch, 200] 12 # \x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W2 [200, 100] B2[100] 13 # · · · · · · · Y2 [batch, 100] 14 # \x/x\x/x\x/ -- fully connected layer (sigmoid) W3 [100, 60] B3[60] 15 # · · · · · Y3 [batch, 60] 16 # \x/x\x/ -- fully connected layer (sigmoid) W4 [60, 30] B4[30] 17 # · · · Y4 [batch, 30] 18 # \x/ -- fully connected layer (softmax) W5 [30, 10] B5[10] 19 # · Y5 [batch, 10] 20 21 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 22 mnist = mnist_data.read_data_sets( 23 "data", one_hot=True, reshape=False, validation_size=0) 24 25 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 26 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 27 # correct answers will go here 28 Y_ = tf.placeholder(tf.float32, [None, 10]) 29 30 # five layers and their number of neurons (tha last layer has 10 softmax neurons) 31 L = 200 32 M = 100 33 N = 60 34 O = 30 35 # Weights initialised with small random values between -0.2 and +0.2 36 # When using RELUs, make sure biases are initialised with small *positive* values for example 0.1 = tf.ones([K])/10 37 W1 = tf.Variable(tf.truncated_normal( 38 [784, L], stddev=0.1), name="W1") # 784 = 28 * 28 39 B1 = tf.Variable(tf.zeros([L]), name="B1") 40 W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1), name="W2") 41 B2 = tf.Variable(tf.zeros([M]), name="B2") 42 W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1), name="W3") 43 B3 = tf.Variable(tf.zeros([N]), name="B3") 44 W4 = tf.Variable(tf.truncated_normal([N, O], stddev=0.1), name="W4") 45 B4 = tf.Variable(tf.zeros([O]), name="B4") 46 W5 = tf.Variable(tf.truncated_normal([O, 10], stddev=0.1), name="W5") 47 B5 = tf.Variable(tf.zeros([10]), name="B5") 48 49 # The model 50 XX = tf.reshape(X, [-1, 784]) 51 Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + B1) 52 Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2) 53 Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3) 54 Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4) 55 # Ylogits to divide Y5 and Y for the purpose of 56 # call function "softmax_cross_entropy_with_logits" 57 # that safty to got cross-entropy 58 Ylogits = tf.matmul(Y4, W5) + B5 59 Y = tf.nn.softmax(Ylogits) 60 61 # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images 62 # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability 63 # problems with log(0) which is NaN 64 cross_entropy = tf.nn.softmax_cross_entropy_with_logits( 65 logits=Ylogits, labels=Y_) 66 cross_entropy = tf.reduce_mean(cross_entropy) * 100 67 68 # accuracy of the trained model, between 0 (worst) and 1 (best) 69 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 70 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 71 # print("correct_prediction: %s", correct_prediction) 72 73 # matplotlib visualisation 74 allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), 75 tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0) 76 allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), 77 tf.reshape(B3, [-1]), tf.reshape(B4, [-1]), tf.reshape(B5, [-1])], 0) 78 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) 79 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) 80 datavis = tensorflowvisu.MnistDataVis() 81 82 # training step, learning rate = 0.003 83 learning_rate = 0.003 84 train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) 85 86 # init 87 init = tf.global_variables_initializer() 88 sess = tf.Session() 89 sess.run(init) 90 91 92 # You can call this function in a loop to train the model, 100 images at a time 93 def training_step(i, update_test_data, update_train_data): 94 95 # training on batches of 100 images with 100 labels 96 batch_X, batch_Y = mnist.train.next_batch(100) 97 98 # compute training values for visualisation 99 if update_train_data: 100 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], { 101 X: batch_X, Y_: batch_Y}) 102 print(str(i) + ": accuracy:" + str(a) + " loss: " + 103 str(c) + " (lr:" + str(learning_rate) + ")") 104 datavis.append_training_curves_data(i, a, c) 105 datavis.update_image1(im) 106 datavis.append_data_histograms(i, w, b) 107 108 # compute test values for visualisation 109 if update_test_data: 110 a, c, im = sess.run([accuracy, cross_entropy, It], { 111 X: mnist.test.images, Y_: mnist.test.labels}) 112 print(str(i) + ": ********* epoch " + str(i * 100 // 113 mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 114 datavis.append_test_curves_data(i, a, c) 115 datavis.update_image2(im) 116 117 # the backpropagation training step 118 sess.run(train_step, {X: batch_X, Y_: batch_Y}) 119 120 121 ''' 122 datavis.animate(training_step, iterations=10000 + 1, train_data_update_freq=20, 123 test_data_update_freq = 100, more_tests_at_start = True) 124 ''' 125 126 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 127 # to disable the visualisation use the following line instead of the datavis.animate line 128 for i in range(10000 + 1): 129 training_step(i, i % 100 == 0, i % 20 == 0) 130 131 print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
圖形可視化訓練過程:
五層全相連的神經網絡結構能達到97%的識別率。
調用識別自己的手寫數字:
- 卷積神經網絡結構

1 import tensorflow as tf 2 import tensorflowvisu 3 import math 4 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 5 print("Tensorflow version " + tf.__version__) 6 tf.set_random_seed(0) 7 8 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 9 mnist = mnist_data.read_data_sets( 10 "data", one_hot=True, reshape=False, validation_size=0) 11 12 # neural network structure for this sample: 13 # 14 # · · · · · · · · · · (input data, 1-deep) X [batch, 28, 28, 1] 15 # @ @ @ @ @ @ @ @ @ @ -- conv. layer 5x5x1=>4 stride 1 W1 [5, 5, 1, 4] B1 [4] 16 # ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶ Y1 [batch, 28, 28, 4] 17 # @ @ @ @ @ @ @ @ -- conv. layer 5x5x4=>8 stride 2 W2 [5, 5, 4, 8] B2 [8] 18 # ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶ Y2 [batch, 14, 14, 8] 19 # @ @ @ @ @ @ -- conv. layer 4x4x8=>12 stride 2 W3 [4, 4, 8, 12] B3 [12] 20 # ∶∶∶∶∶∶∶∶∶∶∶ Y3 [batch, 7, 7, 12] => reshaped to YY [batch, 7*7*12] 21 # \x/x\x\x/ -- fully connected layer (relu) W4 [7*7*12, 200] B4 [200] 22 # · · · · Y4 [batch, 200] 23 # \x/x\x/ -- fully connected layer (softmax) W5 [200, 10] B5 [10] 24 # · · · Y [batch, 10] 25 26 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 27 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 28 # correct answers will go here 29 Y_ = tf.placeholder(tf.float32, [None, 10]) 30 # variable learning rate 31 lr = tf.placeholder(tf.float32) 32 33 # three convolutional layers with their channel counts, and a 34 # fully connected layer (tha last layer has 10 softmax neurons) 35 K = 4 # first convolutional layer output depth 36 L = 8 # second convolutional layer output depth 37 M = 12 # third convolutional layer 38 N = 200 # fully connected layer 39 40 # 5x5 patch, 1 input channel, K output channels 41 W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.1), name="W1") 42 B1 = tf.Variable(tf.ones([K]) / 10, name="B1") 43 W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1), name="W2") 44 B2 = tf.Variable(tf.ones([L]) / 10, name="B2") 45 W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1), name="W3") 46 B3 = tf.Variable(tf.ones([M]) / 10, name="B3") 47 48 W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1), name="W4") 49 B4 = tf.Variable(tf.ones([N]) / 10, name="B4") 50 W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1), name="W5") 51 B5 = tf.Variable(tf.ones([10]) / 10, name="B5") 52 53 # The model 54 stride = 1 # output is 28x28 55 Y1 = tf.nn.relu(tf.nn.conv2d( 56 X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) 57 stride = 2 # output is 14x14 58 Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[ 59 1, stride, stride, 1], padding='SAME') + B2) 60 stride = 2 # output is 7x7 61 Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[ 62 1, stride, stride, 1], padding='SAME') + B3) 63 64 # reshape the output from the third convolution for the fully connected layer 65 YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) 66 67 Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) 68 Ylogits = tf.matmul(Y4, W5) + B5 69 Y = tf.nn.softmax(Ylogits) 70 71 # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images 72 # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability 73 # problems with log(0) which is NaN 74 cross_entropy = tf.nn.softmax_cross_entropy_with_logits( 75 logits=Ylogits, labels=Y_) 76 cross_entropy = tf.reduce_mean(cross_entropy) * 100 77 78 # accuracy of the trained model, between 0 (worst) and 1 (best) 79 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 80 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 81 82 # matplotlib visualisation 83 allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), 84 tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0) 85 allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), 86 tf.reshape(B3, [-1]), tf.reshape(B4, [-1]), tf.reshape(B5, [-1])], 0) 87 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) 88 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) 89 datavis = tensorflowvisu.MnistDataVis() 90 91 # training step, the learning rate is a placeholder 92 train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) 93 94 # init 95 init = tf.global_variables_initializer() 96 sess = tf.Session() 97 sess.run(init) 98 99 # You can call this function in a loop to train the model, 100 images at a time 100 101 102 def training_step(i, update_test_data, update_train_data): 103 104 # training on batches of 100 images with 100 labels 105 batch_X, batch_Y = mnist.train.next_batch(100) 106 107 # learning rate decay 108 max_learning_rate = 0.003 109 min_learning_rate = 0.0001 110 decay_speed = 2000.0 111 learning_rate = min_learning_rate + \ 112 (max_learning_rate - min_learning_rate) * math.exp(-i / decay_speed) 113 114 # compute training values for visualisation 115 if update_train_data: 116 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], { 117 X: batch_X, Y_: batch_Y}) 118 print(str(i) + ": accuracy:" + str(a) + " loss: " + 119 str(c) + " (lr:" + str(learning_rate) + ")") 120 datavis.append_training_curves_data(i, a, c) 121 datavis.update_image1(im) 122 datavis.append_data_histograms(i, w, b) 123 124 # compute test values for visualisation 125 if update_test_data: 126 a, c, im = sess.run([accuracy, cross_entropy, It], { 127 X: mnist.test.images, Y_: mnist.test.labels}) 128 print(str(i) + ": ********* epoch " + str(i * 100 // 129 mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 130 datavis.append_test_curves_data(i, a, c) 131 datavis.update_image2(im) 132 133 # the backpropagation training step 134 sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate}) 135 136 #datavis.animate(training_step, 10001, train_data_update_freq=10, test_data_update_freq=100) 137 138 139 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 140 # to disable the visualisation use the following line instead of the datavis.animate line 141 for i in range(10000 + 1): 142 training_step(i, i % 100 == 0, i % 20 == 0) 143 144 print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
見識下僅有三個卷積層的神經網絡結果:
識別自己的手寫數字:
至此總算結束了畢業設計,完成了論文答辯,這兩個月來確實學到了許多東西,也遇到了許多麻煩,po出來是希望能夠給予像我一樣遇到相似困境的人一些幫助。共勉。
參考文獻:沒有博士學位如何玩轉TensorFlow和深度學習