最近学习了卷积神经网络,推荐一些比较好的学习资源
1: https://www.zybuluo.com/hanbingtao/note/485480
2: http://blog.csdn.net/u010540396/article/details/52895074
对于网址,我大部分学习的资源和数学公式都是来源于此,强烈推荐学习。
对于网址2,我下面的代码就是在其基础上改写的,作者是用matlab实现的,这对于不会matlab的同学而言,会比较费时,毕竟,
我们要做的是搞懂卷积神经网络,而不是某一个编程语言。
而且最重要的是,我自己想弄明白CNN的前向网络和误差反向传播算法,自己亲自实现一遍,更有助于理解和记忆,哪怕是看着别人的代码学会的。
A:下面代码实现是LenNet-5的代码,但是只有一个卷积层,一个mean-pooling层,和一个全连接层,出来经过softmax层。
B:使用的数据集是MNIST,你可以到http://yann.lecun.com/exdb/mnist/
C:MNIST的数据解析,可以从我下面的analysisMNIST.py中修改路径,谢谢(http://blog.csdn.net/u014046170/article/details/47445919)然后取得到数据如下情况:
D:在C解析完之后,我把label文件的内容转置了,开始的时候是一行,我改成了一列。
注:代码里面的TODO是很多公式推导,我有空会敲出来,然后也作为超链接给弄出来,怕自己下次又给忘了。
我的总共有三个文件:
这是我定义的全局变量的文件 gParam.py
1 #! /usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 TOP_PATH = '/media/autumn/Work/data/MNIST/mnist-png/' 5 LAB_PATH = '/media/autumn/Work/data/MNIST/mnist-png/label1.txt' 6 C_SIZE = 5 7 F_NUM = 12 8 P_SIZE = 2 9 FILE_TYPE = '.png' 10 MAX_ITER_NUM = 50
这是我测试的文件myCnnTest.py
1 #! /usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 from numpy import * 5 import numpy as np 6 from myCnn import Ccnn 7 import math 8 import gParam 9 10 # code 11 cLyNum = 20 12 pLyNum = 20 13 fLyNum = 100 14 oLyNum = 10 15 train_num = 800 16 17 myCnn = Ccnn(cLyNum, pLyNum, fLyNum, oLyNum) 18 ylabel = myCnn.read_label(gParam.LAB_PATH) 19 for iter0 in range(gParam.MAX_ITER_NUM): 20 for i in range(train_num): 21 data = myCnn.read_pic_data(gParam.TOP_PATH, i) 22 #print shape(data) 23 ylab = int(ylabel[i]) 24 d_m, d_n = shape(data) 25 m_c = d_m - gParam.C_SIZE + 1 26 n_c = d_n - gParam.C_SIZE + 1 27 m_p = m_c/myCnn.pSize 28 n_p = n_c/myCnn.pSize 29 state_c = zeros((m_c, n_c,myCnn.cLyNum)) 30 state_p = zeros((m_p, n_p, myCnn.pLyNum)) 31 for n in range(myCnn.cLyNum): 32 state_c[:,:,n] = myCnn.convolution(data, myCnn.kernel_c[:,:,n]) 33 #print shape(myCnn.cLyNum) 34 tmp_bias = ones((m_c,n_c)) * myCnn.cLyBias[:,n] 35 state_c[:,:,n] = np.tanh(state_c[:,:,n] + tmp_bias)# 加上偏置项然后过激活函数 36 state_p[:,:,n] = myCnn.pooling(state_c[:,:,n],myCnn.pooling_a) 37 state_f, state_f_pre = myCnn.convolution_f1(state_p,myCnn.kernel_f, myCnn.weight_f) 38 #print shape(state_f), shape(state_f_pre) 39 #进入激活函数 40 state_fo = zeros((1,myCnn.fLyNum))#全连接层经过激活函数的结果 41 for n in range(myCnn.fLyNum): 42 state_fo[:,n] = np.tanh(state_f[:,:,n] + myCnn.fLyBias[:,n]) 43 #进入softmax层 44 output = myCnn.softmax_layer(state_fo) 45 err = -output[:,ylab] 46 #计算误差 47 y_pre = output.argmax(axis=1) 48 #print output 49 #计算误差 50 #print err 51 myCnn.cnn_upweight(err,ylab,data,state_c,state_p,\ 52 state_fo, state_f_pre, output) 53 # print myCnn.kernel_c 54 # print myCnn.cLyBias 55 # print myCnn.weight_f 56 # print myCnn.kernel_f 57 # print myCnn.fLyBias 58 # print myCnn.weight_output 59 60 # predict 61 test_num = [] 62 for i in range(100): 63 test_num.append(train_num+i+1) 64 65 for i in test_num: 66 data = myCnn.read_pic_data(gParam.TOP_PATH, i) 67 #print shape(data) 68 ylab = int(ylabel[i]) 69 d_m, d_n = shape(data) 70 m_c = d_m - gParam.C_SIZE + 1 71 n_c = d_n - gParam.C_SIZE + 1 72 m_p = m_c/myCnn.pSize 73 n_p = n_c/myCnn.pSize 74 state_c = zeros((m_c, n_c,myCnn.cLyNum)) 75 state_p = zeros((m_p, n_p, myCnn.pLyNum)) 76 for n in range(myCnn.cLyNum): 77 state_c[:,:,n] = myCnn.convolution(data, myCnn.kernel_c[:,:,n]) 78 #print shape(myCnn.cLyNum) 79 tmp_bias = ones((m_c,n_c)) * myCnn.cLyBias[:,n] 80 state_c[:,:,n] = np.tanh(state_c[:,:,n] + tmp_bias)# 加上偏置项然后过激活函数 81 state_p[:,:,n] = myCnn.pooling(state_c[:,:,n],myCnn.pooling_a) 82 state_f, state_f_pre = myCnn.convolution_f1(state_p,myCnn.kernel_f, myCnn.weight_f) 83 #print shape(state_f), shape(state_f_pre) 84 #进入激活函数 85 state_fo = zeros((1,myCnn.fLyNum))#全连接层经过激活函数的结果 86 for n in range(myCnn.fLyNum): 87 state_fo[:,n] = np.tanh(state_f[:,:,n] + myCnn.fLyBias[:,n]) 88 #进入softmax层 89 output = myCnn.softmax_layer(state_fo) 90 #计算误差 91 y_pre = output.argmax(axis=1) 92 print '真实数字为%d',ylab, '预测数字是%d', y_pre
接下来是CNN的核心代码,里面有中文注释,文件名是myCnn.py
1 #! /usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 from numpy import * 5 import numpy as np 6 import matplotlib.pyplot as plt 7 import matplotlib.image as mgimg 8 import math 9 import gParam 10 import copy 11 import scipy.signal as signal 12 13 14 # createst uniform random array w/ values in [a,b) and shape args 15 # return value type is ndarray 16 def rand_arr(a, b, *args): 17 np.random.seed(0) 18 return np.random.rand(*args) * (b - a) + a 19 20 # Class Cnn 21 class Ccnn: 22 def __init__(self, cLyNum, pLyNum,fLyNum,oLyNum): 23 self.cLyNum = cLyNum 24 self.pLyNum = pLyNum 25 self.fLyNum = fLyNum 26 self.oLyNum = oLyNum 27 self.pSize = gParam.P_SIZE 28 self.yita = 0.01 29 self.cLyBias = rand_arr(-0.1, 0.1, 1,cLyNum) 30 self.fLyBias = rand_arr(-0.1, 0.1, 1,fLyNum) 31 self.kernel_c = zeros((gParam.C_SIZE,gParam.C_SIZE,cLyNum)) 32 self.kernel_f = zeros((gParam.F_NUM,gParam.F_NUM,fLyNum)) 33 for i in range(cLyNum): 34 self.kernel_c[:,:,i] = rand_arr(-0.1,0.1,gParam.C_SIZE,gParam.C_SIZE) 35 for i in range(fLyNum): 36 self.kernel_f[:,:,i] = rand_arr(-0.1,0.1,gParam.F_NUM,gParam.F_NUM) 37 self.pooling_a = ones((self.pSize,self.pSize))/(self.pSize**2) 38 self.weight_f = rand_arr(-0.1,0.1, pLyNum, fLyNum) 39 self.weight_output = rand_arr(-0.1,0.1,fLyNum,oLyNum) 40 def read_pic_data(self, path, i): 41 #print 'read_pic_data' 42 data = np.array([]) 43 full_path = path + '%d'%i + gParam.FILE_TYPE 44 try: 45 data = mgimg.imread(full_path) #data is np.array 46 data = (double)(data) 47 except IOError: 48 raise Exception('open file error in read_pic_data():', full_path) 49 return data 50 def read_label(self, path): 51 #print 'read_label' 52 ylab = [] 53 try: 54 fobj = open(path, 'r') 55 for line in fobj: 56 ylab.append(line.strip()) 57 fobj.close() 58 except IOError: 59 raise Exception('open file error in read_label():', path) 60 return ylab 61 #卷积层 62 def convolution(self, data, kernel): 63 data_row, data_col = shape(data) 64 kernel_row, kernel_col = shape(kernel) 65 n = data_col - kernel_col 66 m = data_row - kernel_row 67 state = zeros((m+1, n+1)) 68 for i in range(m+1): 69 for j in range(n+1): 70 temp = multiply(data[i:i+kernel_row,j:j+kernel_col], kernel) 71 state[i][j] = temp.sum() 72 return state 73 # 池化层 74 def pooling(self, data, pooling_a): 75 data_r, data_c = shape(data) 76 p_r, p_c = shape(pooling_a) 77 r0 = data_r/p_r 78 c0 = data_c/p_c 79 state = zeros((r0,c0)) 80 for i in range(c0): 81 for j in range(r0): 82 temp = multiply(data[p_r*i:p_r*i+1,p_c*j:p_c*j+1],pooling_a) 83 state[i][j] = temp.sum() 84 return state 85 #全连接层 86 def convolution_f1(self, state_p1, kernel_f1, weight_f1): 87 #池化层出来的20个特征矩阵乘以池化层与全连接层的连接权重进行相加 88 #wx(这里的偏置项=0),这个结果然后再和全连接层中的神经元的核 89 #进行卷积,返回值: 90 #1:全连接层卷积前,只和weight_f1相加之后的矩阵 91 #2:和全连接层卷积完之后的矩阵 92 n_p0, n_f = shape(weight_f1)#n_p0=20(是Feature Map的个数);n_f是100(全连接层神经元个数) 93 m_p, n_p, pCnt = shape(state_p1)#这个矩阵是三维的 94 m_k_f1, n_k_f1,fCnt = shape(kernel_f1)#12*12*100 95 state_f1_temp = zeros((m_p,n_p,n_f)) 96 state_f1 = zeros((m_p - m_k_f1 + 1,n_p - n_k_f1 + 1,n_f)) 97 for n in range(n_f): 98 count = 0 99 for m in range(n_p0): 100 temp = state_p1[:,:,m] * weight_f1[m][n] 101 count = count + temp 102 state_f1_temp[:,:,n] = count 103 state_f1[:,:,n] = self.convolution(state_f1_temp[:,:,n], kernel_f1[:,:,n]) 104 return state_f1, state_f1_temp 105 # softmax 层 106 def softmax_layer(self,state_f1): 107 # print 'softmax_layer' 108 output = zeros((1,self.oLyNum)) 109 t1 = (exp(np.dot(state_f1,self.weight_output))).sum() 110 for i in range(self.oLyNum): 111 t0 = exp(np.dot(state_f1,self.weight_output[:,i])) 112 output[:,i]=t0/t1 113 return output 114 #误差反向传播更新权值 115 def cnn_upweight(self,err_cost, ylab, train_data,state_c1, \ 116 state_s1, state_f1, state_f1_temp, output): 117 #print 'cnn_upweight' 118 m_data, n_data = shape(train_data) 119 # softmax的资料请查看 (TODO) 120 label = zeros((1,self.oLyNum)) 121 label[:,ylab] = 1 122 delta_layer_output = output - label 123 weight_output_temp = copy.deepcopy(self.weight_output) 124 delta_weight_output_temp = zeros((self.fLyNum, self.oLyNum)) 125 #print shape(state_f1) 126 #更新weight_output 127 for n in range(self.oLyNum): 128 delta_weight_output_temp[:,n] = delta_layer_output[:,n] * state_f1 129 weight_output_temp = weight_output_temp - self.yita * delta_weight_output_temp 130 131 #更新bais_f和kernel_f (推导公式请查看 TODO) 132 delta_layer_f1 = zeros((1, self.fLyNum)) 133 delta_bias_f1 = zeros((1,self.fLyNum)) 134 delta_kernel_f1_temp = zeros(shape(state_f1_temp)) 135 kernel_f_temp = copy.deepcopy(self.kernel_f) 136 for n in range(self.fLyNum): 137 count = 0 138 for m in range(self.oLyNum): 139 count = count + delta_layer_output[:,m] * self.weight_output[n,m] 140 delta_layer_f1[:,n] = np.dot(count, (1 - np.tanh(state_f1[:,n])**2)) 141 delta_bias_f1[:,n] = delta_layer_f1[:,n] 142 delta_kernel_f1_temp[:,:,n] = delta_layer_f1[:,n] * state_f1_temp[:,:,n] 143 # 1 144 self.fLyBias = self.fLyBias - self.yita * delta_bias_f1 145 kernel_f_temp = kernel_f_temp - self.yita * delta_kernel_f1_temp 146 147 #更新weight_f1 148 delta_layer_f1_temp = zeros((gParam.F_NUM,gParam.F_NUM,self.fLyNum)) 149 delta_weight_f1_temp = zeros(shape(self.weight_f)) 150 weight_f1_temp = copy.deepcopy(self.weight_f) 151 for n in range(self.fLyNum): 152 delta_layer_f1_temp[:,:,n] = delta_layer_f1[:,n] * self.kernel_f[:,:,n] 153 for n in range(self.pLyNum): 154 for m in range(self.fLyNum): 155 temp = delta_layer_f1_temp[:,:,m] * state_s1[:,:,n] 156 delta_weight_f1_temp[n,m] = temp.sum() 157 weight_f1_temp = weight_f1_temp - self.yita * delta_weight_f1_temp 158 159 # 更新bias_c1 160 n_delta_c = m_data - gParam.C_SIZE + 1 161 delta_layer_p = zeros((gParam.F_NUM,gParam.F_NUM,self.pLyNum)) 162 delta_layer_c = zeros((n_delta_c,n_delta_c,self.pLyNum)) 163 delta_bias_c = zeros((1,self.cLyNum)) 164 for n in range(self.pLyNum): 165 count = 0 166 for m in range(self.fLyNum): 167 count = count + delta_layer_f1_temp[:,:,m] * self.weight_f[n,m] 168 delta_layer_p[:,:,n] = count 169 #print shape(np.kron(delta_layer_p[:,:,n], ones((2,2))/4)) 170 delta_layer_c[:,:,n] = np.kron(delta_layer_p[:,:,n], ones((2,2))/4) \ 171 * (1 - np.tanh(state_c1[:,:,n])**2) 172 delta_bias_c[:,n] = delta_layer_c[:,:,n].sum() 173 # 2 174 self.cLyBias = self.cLyBias - self.yita * delta_bias_c 175 #更新 kernel_c1 176 delta_kernel_c1_temp = zeros(shape(self.kernel_c)) 177 for n in range(self.cLyNum): 178 temp = delta_layer_c[:,:,n] 179 r1 = map(list,zip(*temp[::1]))#逆时针旋转90度 180 r2 = map(list,zip(*r1[::1]))#再逆时针旋转90度 181 temp = signal.convolve2d(train_data, r2,'valid') 182 temp1 = map(list,zip(*temp[::1])) 183 delta_kernel_c1_temp[:,:,n] = map(list,zip(*temp1[::1])) 184 self.kernel_c = self.kernel_c - self.yita * delta_kernel_c1_temp 185 self.weight_f = weight_f1_temp 186 self.kernel_f = kernel_f_temp 187 self.weight_output = weight_output_temp 188 189 # predict 190 def cnn_predict(self,data): 191 return
这是单独解析MNIST的脚本,analysisMNIST.py,修改相应的路径后,运行能成功
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 5 from PIL import Image 6 import struct 7 8 9 def read_image(filename): 10 f = open(filename, 'rb') 11 12 13 index = 0 14 buf = f.read() 15 16 17 f.close() 18 19 20 magic, images, rows, columns = struct.unpack_from('>IIII' , buf , index) 21 index += struct.calcsize('>IIII') 22 23 24 for i in xrange(images): 25 #for i in xrange(2000): 26 image = Image.new('L', (columns, rows)) 27 28 29 for x in xrange(rows): 30 for y in xrange(columns): 31 image.putpixel((y, x), int(struct.unpack_from('>B', buf, index)[0])) 32 index += struct.calcsize('>B') 33 34 35 print 'save ' + str(i) + 'image' 36 image.save('/media/autumn/Work/data/MNIST/mnist-png/' + str(i) + '.png') 37 38 39 def read_label(filename, saveFilename): 40 f = open(filename, 'rb') 41 index = 0 42 buf = f.read() 43 44 45 f.close() 46 47 48 magic, labels = struct.unpack_from('>II' , buf , index) 49 index += struct.calcsize('>II') 50 51 labelArr = [0] * labels 52 #labelArr = [0] * 2000 53 54 55 for x in xrange(labels): 56 #for x in xrange(2000): 57 labelArr[x] = int(struct.unpack_from('>B', buf, index)[0]) 58 index += struct.calcsize('>B') 59 60 61 save = open(saveFilename, 'w') 62 63 64 save.write(','.join(map(lambda x: str(x), labelArr))) 65 save.write('\n') 66 67 68 save.close() 69 print 'save labels success' 70 71 72 if __name__ == '__main__': 73 read_image('/media/autumn/Work/data/MNIST/mnist/t10k-images.idx3-ubyte') 74 read_label('/media/autumn/Work/data/MNIST/mnist/t10k-labels.idx1-ubyte', '/media/autumn/Work/data/MNIST/mnist-png/label.txt')