写博客的目的是发现虽然网上有许多深度学习资源可供使用,但是要独立的完成一个程序,如何恢复调用模型并不是想象的那么容易,踩过许多坑。幸运的是最终完成了设计和论文。贴出来与大家共享一下。
用到的基础工具:Anaconda,pytq5库,image库,TensorFlow(GPU版)
ps:由于篇幅有限,关于用到的各种神经网络知识部分可以参考我的论文中介绍。
完整代码已放入[github仓库]
- 首先解决GUI的问题。网上参考了一些别人的半成品,在自己的加工下凑合能用,基于python的pyqt5库开发。
先展示下成品:稍后会将代码放出。

这里我使用了四个py文件:DigitalMnistNum.py,MainWindowC.py,UI_MainWindow.py,run.py
- DigitalMnistNum.py定义了clear,save,recog,以及result事件的操作。初始化时设置保存画布中图像为png格式,以及设置大小为28*28pixel。
1 # 定义手写数字面板类 2 from PyQt5 import QtCore, QtGui, QtWidgets 3 from PyQt5.QtGui import QColor 4 5 6 class DigitalMnistNum(QtWidgets.QWidget): 7 def __init__(self, parent=None): 8 super(DigitalMnistNum, self).__init__(parent) 9 self.pen = QtGui.QPen() 10 self.pen.setStyle(QtCore.Qt.SolidLine) 11 self.pen.setWidth(12) # 笔的粗细 12 self.pen.setColor(QtCore.Qt.white) # 白色字体 13 # 图片大小为28*28 pixel 14 self.bitmapSize = QtCore.QSize(28, 28) 15 self.resetBitmap() 16 17 def resetBitmap(self): 18 self.pix = QtGui.QBitmap(self.size()) 19 self.pix.fill(QtCore.Qt.black) # 设置黑色背景 20 21 # 清除按钮 22 def clearBitmap(self): 23 self.resetBitmap() 24 self.update() 25 # 保存图片格式以及图片信息 26 27 def recongBitmap(self): 28 pass 29 30 def saveBitmap(self): 31 fileName = str("pic.bmp") 32 tmp = self.pix.scaled( 33 self.bitmapSize, QtCore.Qt.KeepAspectRatio) # 保存图片 34 QtCore.qDebug(str(tmp.size())) 35 tmp.save(fileName) 36 37 def setBitmapSize(self, size): 38 self.bitmapSize = QtCore.QSize(size[0], size[1])
- 四个鼠标事件:按下,移动,划线,释放函数参考博客:https://www.cnblogs.com/PyLearn/p/7689170.html
1 # 以下三个函数为记录鼠标手写数字事件 2 # 定义鼠标按下事件 3 def mousePressEvent(self, event): 4 if event.button() == QtCore.Qt.LeftButton: 5 self.startPos = event.pos() 6 painter = QtGui.QPainter() 7 painter.begin(self.pix) 8 painter.setPen(self.pen) 9 painter.drawPoint(self.startPos) 10 painter.end() 11 self.update() 12 # 鼠标移动事件 13 14 def mouseMoveEvent(self, event): 15 painter = QtGui.QPainter() 16 painter.begin(self.pix) 17 painter.setPen(self.pen) 18 painter.drawLine(self.startPos, event.pos()) 19 painter.end() 20 self.startPos = event.pos() 21 self.update() 22 # 鼠标画线事件 23 24 def paintEvent(self, event): 25 if self.size() != self.pix.size(): 26 QtCore.qDebug(str(self.size()) + "," + 27 str(self.pix.size()) + "," + str(event.type())) 28 self.resetBitmap() 29 painter = QtGui.QPainter(self) 30 painter.drawPixmap(QtCore.QPoint(0, 0), self.pix) 31 # 鼠标释放事件 32 33 def mouseReleaseEvent(self, event): 34 self.update()
- MainWindowC.py
MainWindowC功能是调用clear,save,recong按钮事件。清除保存画布比较简单。
1 class MainWindow(QtWidgets.QMainWindow): 2 def __init__(self, parent=None): 3 super(MainWindow, self).__init__(parent) 4 self.ui = Ui_MainWindow() 5 self.ui.setupUi(self) 6 7 def clearBtn(self): 8 QtCore.qDebug(str("clearBtn")) 9 self.ui.widget.clearBitmap() 10 11 def saveBtn(self): 12 QtCore.qDebug(str("saveBtn")) 13 self.ui.widget.saveBitmap() 14 15 def setLabelText(self, text): 16 self.ui.result.setText(text) 17 18 def setBitmapSize(self, size): 19 self.ui.widget.setBitmapSize(size)
识别事件部分是重点:首先需要对保存的手写数字图片进行预处理,打开,调用image库读取图片list格式,还需要对初始值进行转换,转换为MNIST数据集中一样的数据格式。
1 # 预测过程 2 def recongBtn(self): 3 QtCore.qDebug(str("recongBtn")) 4 self.ui.widget.recongBitmap() 5 # 打开自己的图片地址 6 file_name = R"C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\pic.bmp" 7 img = Image.open(file_name).convert('L') 8 cvtValue = list(img.getdata()) 9 # 初始化图片的值,1表示纯白色,0表示纯黑色 10 #resCvtValue = [(255 - x) * 1.0 / 255.0 for x in cvtValue] 11 resCvtValue = [x / 255.0 for x in cvtValue] 12 newShape = array(resCvtValue).reshape(28, 28, 1)
接下来使用到了TensorFlow中关于模型中参数恢复的方法。import_meta_graph()和restore()方法一起使用恢复模型中的参数。假设模型已经事先训练完毕并保存。
1 # 加载保存的参数 2 with tf.Session() as sess: 3 sess.run(tf.global_variables_initializer()) 4 new_saver = tf.train.import_meta_graph( 5 R'C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md2\init-1000.meta') 6 new_saver.restore(sess, tf.train.latest_checkpoint( 7 R'C:\Users\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md2')) 8 print("model restore done\n") 9 graph = tf.get_default_graph()
三个代码段落(注释掉了两个)是因为在程序中我使用了三个神经网络模型,需要分别测试模型的准确率,需要不同的接受图片输入格式。设置变量用于接收从模型中恢复的参数的值。
model1表示单层神经网络,model2表示五层全相连神经网络,model3表示卷积神经网络。
1 ''' 2 # model 1 3 W = graph.get_tensor_by_name("W:0") 4 b = graph.get_tensor_by_name("b:0") 5 XX = tf.reshape(newShape, [-1, 784]) 6 Y = tf.nn.softmax(tf.matmul(tf.cast(XX, tf.float32), W) + b) 7 8 feed_dict = {XX: [resCvtValue]} 9 ''' 10 11 # model 2 12 #X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 13 L = 200 14 M = 100 15 N = 60 16 O = 30 17 XX = tf.reshape(newShape, [-1, 784]) 18 W1 = graph.get_tensor_by_name("W1:0") 19 B1 = graph.get_tensor_by_name("B1:0") 20 21 W2 = graph.get_tensor_by_name("W2:0") 22 B2 = graph.get_tensor_by_name("B2:0") 23 24 W3 = graph.get_tensor_by_name("W3:0") 25 B3 = graph.get_tensor_by_name("B3:0") 26 27 W4 = graph.get_tensor_by_name("W4:0") 28 B4 = graph.get_tensor_by_name("B4:0") 29 30 W5 = graph.get_tensor_by_name("W5:0") 31 B5 = graph.get_tensor_by_name("B5:0") 32 33 Y1 = tf.nn.sigmoid(tf.matmul(tf.cast(XX, tf.float32), W1) + B1) 34 Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2) 35 Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3) 36 Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4) 37 Ylogits = tf.matmul(Y4, W5) + B5 38 Y = tf.nn.softmax(Ylogits) 39 feed_dict = {XX: [resCvtValue]} 40 41 ''' 42 # model 3 43 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 44 K = 4 # first convolutional layer output depth 45 L = 8 # second convolutional layer output depth 46 M = 12 # third convolutional layer 47 N = 200 # fully connected layer 48 49 W1 = graph.get_tensor_by_name("W1:0") 50 B1 = graph.get_tensor_by_name("B1:0") 51 stride = 1 # output is 28x28 52 Y1 = tf.nn.relu(tf.nn.conv2d( 53 X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) 54 55 W2 = graph.get_tensor_by_name("W2:0") 56 B2 = graph.get_tensor_by_name("B2:0") 57 stride = 2 # output is 14x14 58 Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[ 59 1, stride, stride, 1], padding='SAME') + B2) 60 61 W3 = graph.get_tensor_by_name("W3:0") 62 B3 = graph.get_tensor_by_name("B3:0") 63 64 stride = 2 # output is 7x7 65 Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[ 66 1, stride, stride, 1], padding='SAME') + B3) 67 68 # reshape the output from the third convolution for the fully connected layer 69 YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) 70 71 W4 = graph.get_tensor_by_name("W4:0") 72 B4 = graph.get_tensor_by_name("B4:0") 73 Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) 74 75 W5 = graph.get_tensor_by_name("W5:0") 76 B5 = graph.get_tensor_by_name("B5:0") 77 78 Ylogits = tf.matmul(Y4, W5) + B5 79 Y = tf.nn.softmax(Ylogits) 80 81 feed_dict = {X: [newShape]} 82 83 '''
- UI_MainWindow.py
UI部分主要是设置窗口大小,画布大小,按钮显示大小等布局。
1 from PyQt5 import QtCore, QtGui, QtWidgets 2 # DigitalMnistNum为数字画板的子类 3 from DigitalMnistNum import DigitalMnistNum 4 5 6 class Ui_MainWindow(object): 7 def setupUi(self, MainWindow): 8 MainWindow.setObjectName("MainWindow") 9 MainWindow.resize(320, 200) # 主窗口大小 10 sizePolicy = QtWidgets.QSizePolicy( 11 QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed) 12 sizePolicy.setHorizontalStretch(0) 13 sizePolicy.setVerticalStretch(0) 14 sizePolicy.setHeightForWidth( 15 MainWindow.sizePolicy().hasHeightForWidth()) 16 MainWindow.setSizePolicy(sizePolicy) 17 self.centralWidget = QtWidgets.QWidget(MainWindow) 18 self.centralWidget.setObjectName("centralWidget") 19 self.widget = DigitalMnistNum(self.centralWidget) 20 self.widget.setGeometry(QtCore.QRect(30, 20, 140, 140)) # 画布用140*140 21 self.widget.setObjectName("widget") 22 # 修改右侧布局 23 self.verticalLayoutWidget = QtWidgets.QWidget(self.centralWidget) 24 self.verticalLayoutWidget.setGeometry(QtCore.QRect(190, 20, 105, 140)) 25 self.verticalLayoutWidget.setObjectName("verticalLayoutWidget") 26 self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget) 27 self.verticalLayout.setContentsMargins(20, 20, 20, 20) 28 self.verticalLayout.setSpacing(6) 29 self.verticalLayout.setObjectName("verticalLayout") 30 31 self.clearBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 32 self.clearBtn.setObjectName("clearBtn") 33 self.verticalLayout.addWidget(self.clearBtn) 34 35 self.saveBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 36 self.saveBtn.setObjectName("saveBtn") 37 self.verticalLayout.addWidget(self.saveBtn) 38 39 self.recongBtn = QtWidgets.QPushButton(self.verticalLayoutWidget) 40 self.recongBtn.setObjectName("recongBtn") 41 self.verticalLayout.addWidget(self.recongBtn) 42 self.result = QtWidgets.QLabel(self.verticalLayoutWidget) 43 44 font = QtGui.QFont() 45 font.setFamily("Arial") 46 font.setPointSize(12) 47 font.setBold(True) 48 font.setWeight(70) 49 # 结果显示区域 50 self.result.setFont(font) 51 self.result.setObjectName("res") 52 self.verticalLayout.addWidget(self.result) 53 self.verticalLayout.setStretch(0, 1) 54 self.verticalLayout.setStretch(1, 1) 55 self.verticalLayout.setStretch(2, 1) 56 self.verticalLayout.setStretch(3, 2) 57 MainWindow.setCentralWidget(self.centralWidget) 58 59 self.retranslateUi(MainWindow) 60 self.clearBtn.clicked.connect(MainWindow.clearBtn) 61 self.saveBtn.clicked.connect(MainWindow.saveBtn) 62 self.recongBtn.clicked.connect(MainWindow.recongBtn) 63 QtCore.QMetaObject.connectSlotsByName(MainWindow) 64 65 def retranslateUi(self, MainWindow): 66 _translate = QtCore.QCoreApplication.translate 67 MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) 68 self.clearBtn.setText(_translate("MainWindow", "clear")) 69 self.saveBtn.setText(_translate("MainWindow", "save")) 70 self.recongBtn.setText(_translate("MainWindow", "recog")) 71 self.result.setText(_translate("MainWindow", "res"))
- run.py
链接上述三个文件,执行。
1 import sys 2 from PyQt5 import QtWidgets, QtGui 3 from MainWindowC import MainWindow 4 5 if __name__ == '__main__': 6 7 app = QtWidgets.QApplication(sys.argv) 8 win = MainWindow() 9 win.show() 10 sys.exit(app.exec_())
2 深度学习网络模型。
关于神经网络结构的模型就不废话了,论文中有详细介绍。
- 单层神经网络结构(就不废话了,直接上代码)
1 import tensorflow as tf 2 import tensorflowvisu 3 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 4 print("Tensorflow version " + tf.__version__) 5 tf.set_random_seed(0) 6 7 # neural network with 1 layer of 10 softmax neurons 8 # 9 # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28 10 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10] 11 # · · · · · · · · Y [batch, 10] 12 13 # The model is: 14 # 15 # Y = softmax( X * W + b) 16 # X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch) 17 # W: weight matrix with 784 lines and 10 columns 18 # b: bias vector with 10 dimensions 19 # +: add with broadcasting: adds the vector to each line of the matrix (numpy) 20 # softmax(matrix) applies softmax on each line 21 # softmax(line) applies an exp to each value then divides by the norm of the resulting line 22 # Y: output matrix with 100 lines and 10 columns 23 24 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 25 mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0) 26 27 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 28 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 29 # correct answers will go here 30 Y_ = tf.placeholder(tf.float32, [None, 10]) 31 # weights W[784, 10] 784=28*28 32 W = tf.Variable(tf.zeros([784, 10]),name="W") 33 # biases b[10] 34 b = tf.Variable(tf.zeros([10]),name="b") 35 36 # flatten the images into a single line of pixels 37 # -1 in the shape definition means "the only possible dimension that will preserve the number of elements" 38 XX = tf.reshape(X, [-1, 784]) 39 40 # The model 41 Y = tf.nn.softmax(tf.matmul(XX, W) + b) 42 43 # loss function: cross-entropy = - sum( Y_i * log(Yi) ) 44 # Y: the computed output vector 45 # Y_: the desired output vector 46 47 # cross-entropy 48 # log takes the log of each element, * multiplies the tensors element by element 49 # reduce_mean will add all the components in the tensor 50 # so here we end up with the total cross-entropy for all images in the batch 51 cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images, 52 # *10 because "mean" included an unwanted division by 10 53 54 # accuracy of the trained model, between 0 (worst) and 1 (best) 55 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 56 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 57 58 # training, learning rate = 0.005 59 train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cross_entropy) 60 61 # matplotlib visualisation 62 allweights = tf.reshape(W, [-1]) 63 allbiases = tf.reshape(b, [-1]) 64 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) # assembles 10x10 images by default 65 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) # 1000 images on 25 lines 66 datavis = tensorflowvisu.MnistDataVis() 67 68 # init 69 init = tf.global_variables_initializer() 70 sess = tf.Session() 71 sess.run(init) 72 73 74 # You can call this function in a loop to train the model, 100 images at a time 75 def training_step(i, update_test_data, update_train_data): 76 77 # training on batches of 100 images with 100 labels 78 batch_X, batch_Y = mnist.train.next_batch(100) 79 80 # compute training values for visualisation 81 if update_train_data: 82 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y}) 83 datavis.append_training_curves_data(i, a, c) 84 datavis.append_data_histograms(i, w, b) 85 datavis.update_image1(im) 86 print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c)) 87 88 # compute test values for visualisation 89 if update_test_data: 90 a, c, im = sess.run([accuracy, cross_entropy, It], feed_dict={X: mnist.test.images, Y_: mnist.test.labels}) 91 datavis.append_test_curves_data(i, a, c) 92 datavis.update_image2(im) 93 print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 94 95 # the backpropagation training step 96 sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) 97 98 99 datavis.animate(training_step, iterations=2000+1, train_data_update_freq=10, test_data_update_freq=50, more_tests_at_start=True) 100 101 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 102 # to disable the visualisation use the following line instead of the datavis.animate line 103 #for i in range(2000): training_step(i, i % 50 == 0, i % 10 == 0) 104 105 print("max test accuracy: " + str(datavis.get_max_test_accuracy())) 106 107 # final max test accuracy = 0.9268 (10K iterations). Accuracy should peak above 0.92 in the first 2000 iterations. 108 109 saver = tf.train.Saver() 110 md_path = R"C:\Users\yaoya\AppData\Local\conda\conda\envs\tensorflow\tensorflow-mnist-tutorial\TestProject\mdlib\md1\init" 111 # Later, launch the model, initialize the variables, do some work, save the 112 # variables to disk. 113 sess.run(init) 114 save_path = saver.save(sess, md_path, global_step=1000) 115 print("Model saved in file: %s" % save_path)
给出训练结果过程:

图形可视化训练过程:

可以看到识别率最终在92%左右,pretty bad :(。
自己调用模型时的识别结果:

- 全相连神经网络结构
1 import tensorflow as tf 2 import tensorflowvisu 3 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 4 print("Tensorflow version " + tf.__version__) 5 tf.set_random_seed(0) 6 7 # neural network with 5 layers 8 # 9 # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28*28 10 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W1 [784, 200] B1[200] 11 # · · · · · · · · · Y1 [batch, 200] 12 # \x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W2 [200, 100] B2[100] 13 # · · · · · · · Y2 [batch, 100] 14 # \x/x\x/x\x/ -- fully connected layer (sigmoid) W3 [100, 60] B3[60] 15 # · · · · · Y3 [batch, 60] 16 # \x/x\x/ -- fully connected layer (sigmoid) W4 [60, 30] B4[30] 17 # · · · Y4 [batch, 30] 18 # \x/ -- fully connected layer (softmax) W5 [30, 10] B5[10] 19 # · Y5 [batch, 10] 20 21 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 22 mnist = mnist_data.read_data_sets( 23 "data", one_hot=True, reshape=False, validation_size=0) 24 25 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 26 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 27 # correct answers will go here 28 Y_ = tf.placeholder(tf.float32, [None, 10]) 29 30 # five layers and their number of neurons (tha last layer has 10 softmax neurons) 31 L = 200 32 M = 100 33 N = 60 34 O = 30 35 # Weights initialised with small random values between -0.2 and +0.2 36 # When using RELUs, make sure biases are initialised with small *positive* values for example 0.1 = tf.ones([K])/10 37 W1 = tf.Variable(tf.truncated_normal( 38 [784, L], stddev=0.1), name="W1") # 784 = 28 * 28 39 B1 = tf.Variable(tf.zeros([L]), name="B1") 40 W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1), name="W2") 41 B2 = tf.Variable(tf.zeros([M]), name="B2") 42 W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1), name="W3") 43 B3 = tf.Variable(tf.zeros([N]), name="B3") 44 W4 = tf.Variable(tf.truncated_normal([N, O], stddev=0.1), name="W4") 45 B4 = tf.Variable(tf.zeros([O]), name="B4") 46 W5 = tf.Variable(tf.truncated_normal([O, 10], stddev=0.1), name="W5") 47 B5 = tf.Variable(tf.zeros([10]), name="B5") 48 49 # The model 50 XX = tf.reshape(X, [-1, 784]) 51 Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + B1) 52 Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2) 53 Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3) 54 Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4) 55 # Ylogits to divide Y5 and Y for the purpose of 56 # call function "softmax_cross_entropy_with_logits" 57 # that safty to got cross-entropy 58 Ylogits = tf.matmul(Y4, W5) + B5 59 Y = tf.nn.softmax(Ylogits) 60 61 # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images 62 # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability 63 # problems with log(0) which is NaN 64 cross_entropy = tf.nn.softmax_cross_entropy_with_logits( 65 logits=Ylogits, labels=Y_) 66 cross_entropy = tf.reduce_mean(cross_entropy) * 100 67 68 # accuracy of the trained model, between 0 (worst) and 1 (best) 69 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 70 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 71 # print("correct_prediction: %s", correct_prediction) 72 73 # matplotlib visualisation 74 allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), 75 tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0) 76 allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), 77 tf.reshape(B3, [-1]), tf.reshape(B4, [-1]), tf.reshape(B5, [-1])], 0) 78 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) 79 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) 80 datavis = tensorflowvisu.MnistDataVis() 81 82 # training step, learning rate = 0.003 83 learning_rate = 0.003 84 train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) 85 86 # init 87 init = tf.global_variables_initializer() 88 sess = tf.Session() 89 sess.run(init) 90 91 92 # You can call this function in a loop to train the model, 100 images at a time 93 def training_step(i, update_test_data, update_train_data): 94 95 # training on batches of 100 images with 100 labels 96 batch_X, batch_Y = mnist.train.next_batch(100) 97 98 # compute training values for visualisation 99 if update_train_data: 100 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], { 101 X: batch_X, Y_: batch_Y}) 102 print(str(i) + ": accuracy:" + str(a) + " loss: " + 103 str(c) + " (lr:" + str(learning_rate) + ")") 104 datavis.append_training_curves_data(i, a, c) 105 datavis.update_image1(im) 106 datavis.append_data_histograms(i, w, b) 107 108 # compute test values for visualisation 109 if update_test_data: 110 a, c, im = sess.run([accuracy, cross_entropy, It], { 111 X: mnist.test.images, Y_: mnist.test.labels}) 112 print(str(i) + ": ********* epoch " + str(i * 100 // 113 mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 114 datavis.append_test_curves_data(i, a, c) 115 datavis.update_image2(im) 116 117 # the backpropagation training step 118 sess.run(train_step, {X: batch_X, Y_: batch_Y}) 119 120 121 ''' 122 datavis.animate(training_step, iterations=10000 + 1, train_data_update_freq=20, 123 test_data_update_freq = 100, more_tests_at_start = True) 124 ''' 125 126 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 127 # to disable the visualisation use the following line instead of the datavis.animate line 128 for i in range(10000 + 1): 129 training_step(i, i % 100 == 0, i % 20 == 0) 130 131 print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
图形可视化训练过程:

五层全相连的神经网络结构能达到97%的识别率。
调用识别自己的手写数字:

- 卷积神经网络结构
1 import tensorflow as tf 2 import tensorflowvisu 3 import math 4 from tensorflow.examples.tutorials.mnist import input_data as mnist_data 5 print("Tensorflow version " + tf.__version__) 6 tf.set_random_seed(0) 7 8 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels) 9 mnist = mnist_data.read_data_sets( 10 "data", one_hot=True, reshape=False, validation_size=0) 11 12 # neural network structure for this sample: 13 # 14 # · · · · · · · · · · (input data, 1-deep) X [batch, 28, 28, 1] 15 # @ @ @ @ @ @ @ @ @ @ -- conv. layer 5x5x1=>4 stride 1 W1 [5, 5, 1, 4] B1 [4] 16 # ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶ Y1 [batch, 28, 28, 4] 17 # @ @ @ @ @ @ @ @ -- conv. layer 5x5x4=>8 stride 2 W2 [5, 5, 4, 8] B2 [8] 18 # ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶ Y2 [batch, 14, 14, 8] 19 # @ @ @ @ @ @ -- conv. layer 4x4x8=>12 stride 2 W3 [4, 4, 8, 12] B3 [12] 20 # ∶∶∶∶∶∶∶∶∶∶∶ Y3 [batch, 7, 7, 12] => reshaped to YY [batch, 7*7*12] 21 # \x/x\x\x/ -- fully connected layer (relu) W4 [7*7*12, 200] B4 [200] 22 # · · · · Y4 [batch, 200] 23 # \x/x\x/ -- fully connected layer (softmax) W5 [200, 10] B5 [10] 24 # · · · Y [batch, 10] 25 26 # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch 27 X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 28 # correct answers will go here 29 Y_ = tf.placeholder(tf.float32, [None, 10]) 30 # variable learning rate 31 lr = tf.placeholder(tf.float32) 32 33 # three convolutional layers with their channel counts, and a 34 # fully connected layer (tha last layer has 10 softmax neurons) 35 K = 4 # first convolutional layer output depth 36 L = 8 # second convolutional layer output depth 37 M = 12 # third convolutional layer 38 N = 200 # fully connected layer 39 40 # 5x5 patch, 1 input channel, K output channels 41 W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.1), name="W1") 42 B1 = tf.Variable(tf.ones([K]) / 10, name="B1") 43 W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1), name="W2") 44 B2 = tf.Variable(tf.ones([L]) / 10, name="B2") 45 W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1), name="W3") 46 B3 = tf.Variable(tf.ones([M]) / 10, name="B3") 47 48 W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1), name="W4") 49 B4 = tf.Variable(tf.ones([N]) / 10, name="B4") 50 W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1), name="W5") 51 B5 = tf.Variable(tf.ones([10]) / 10, name="B5") 52 53 # The model 54 stride = 1 # output is 28x28 55 Y1 = tf.nn.relu(tf.nn.conv2d( 56 X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) 57 stride = 2 # output is 14x14 58 Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[ 59 1, stride, stride, 1], padding='SAME') + B2) 60 stride = 2 # output is 7x7 61 Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[ 62 1, stride, stride, 1], padding='SAME') + B3) 63 64 # reshape the output from the third convolution for the fully connected layer 65 YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) 66 67 Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) 68 Ylogits = tf.matmul(Y4, W5) + B5 69 Y = tf.nn.softmax(Ylogits) 70 71 # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images 72 # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability 73 # problems with log(0) which is NaN 74 cross_entropy = tf.nn.softmax_cross_entropy_with_logits( 75 logits=Ylogits, labels=Y_) 76 cross_entropy = tf.reduce_mean(cross_entropy) * 100 77 78 # accuracy of the trained model, between 0 (worst) and 1 (best) 79 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) 80 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 81 82 # matplotlib visualisation 83 allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), 84 tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0) 85 allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), 86 tf.reshape(B3, [-1]), tf.reshape(B4, [-1]), tf.reshape(B5, [-1])], 0) 87 I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_) 88 It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25) 89 datavis = tensorflowvisu.MnistDataVis() 90 91 # training step, the learning rate is a placeholder 92 train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) 93 94 # init 95 init = tf.global_variables_initializer() 96 sess = tf.Session() 97 sess.run(init) 98 99 # You can call this function in a loop to train the model, 100 images at a time 100 101 102 def training_step(i, update_test_data, update_train_data): 103 104 # training on batches of 100 images with 100 labels 105 batch_X, batch_Y = mnist.train.next_batch(100) 106 107 # learning rate decay 108 max_learning_rate = 0.003 109 min_learning_rate = 0.0001 110 decay_speed = 2000.0 111 learning_rate = min_learning_rate + \ 112 (max_learning_rate - min_learning_rate) * math.exp(-i / decay_speed) 113 114 # compute training values for visualisation 115 if update_train_data: 116 a, c, im, w, b = sess.run([accuracy, cross_entropy, I, allweights, allbiases], { 117 X: batch_X, Y_: batch_Y}) 118 print(str(i) + ": accuracy:" + str(a) + " loss: " + 119 str(c) + " (lr:" + str(learning_rate) + ")") 120 datavis.append_training_curves_data(i, a, c) 121 datavis.update_image1(im) 122 datavis.append_data_histograms(i, w, b) 123 124 # compute test values for visualisation 125 if update_test_data: 126 a, c, im = sess.run([accuracy, cross_entropy, It], { 127 X: mnist.test.images, Y_: mnist.test.labels}) 128 print(str(i) + ": ********* epoch " + str(i * 100 // 129 mnist.train.images.shape[0] + 1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c)) 130 datavis.append_test_curves_data(i, a, c) 131 datavis.update_image2(im) 132 133 # the backpropagation training step 134 sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate}) 135 136 #datavis.animate(training_step, 10001, train_data_update_freq=10, test_data_update_freq=100) 137 138 139 # to save the animation as a movie, add save_movie=True as an argument to datavis.animate 140 # to disable the visualisation use the following line instead of the datavis.animate line 141 for i in range(10000 + 1): 142 training_step(i, i % 100 == 0, i % 20 == 0) 143 144 print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
见识下仅有三个卷积层的神经网络结果:

识别自己的手写数字:

至此总算结束了毕业设计,完成了论文答辩,这两个月来确实学到了许多东西,也遇到了许多麻烦,po出来是希望能够给予像我一样遇到相似困境的人一些帮助。共勉。
参考文献:没有博士学位如何玩转TensorFlow和深度学习
