BP算法在minist數據集上的簡單實現


BP算法在minist上的簡單實現

數據:http://yann.lecun.com/exdb/mnist/

參考:blog,blog2,blog3,tensorflow

推導:http://www.cnblogs.com/yueshangzuo/p/8025157.html

基本實現

import struct
import random
import numpy as np
from math import sqrt

class Data:
    def __init__(self):
        print 'parameter initializing...'
        self.num_train= 50000
        self.num_confirm=10000
        self.num_test= 10000
        self.node_in=28*28
        self.node_out=10
        # need to adjust
        #epoch:8 hide_node:39 accuracy:0.9613
        #epoch:8 hide_node:44 accuracy:0.9612
        #epoch:8 hide_node:48 accuracy:0.9624
        #epoch:9 hide_node:48 accuracy:0.9648
        #epoch:10 hide_node:200 accuracy:0.9724
        self.epoch= 15
        self.node_hide= 30
        self.study_rate= 0.05
        self.error_limit= 1e-2

    def read_train_image(self,filename):
        print 'reading train-image data...'
        binfile=open(filename,'rb')
        buffer=binfile.read()
        index=0
        magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index)  #>I:big-endian,unsigned int
        index+=struct.calcsize('IIII')
        for i in range(self.num_train):
            im=struct.unpack_from('784B',buffer,index)  #28*28=786,B unsigned char
            index+=struct.calcsize('784B')
            im=np.array(im)
            im=im.reshape(1,784)/255.0   #28*28-->1
            self.train_imag_list[i,:]=im
        j=0
        for i in range(self.num_train,self.num_train+self.num_confirm):
            im=struct.unpack_from('784B',buffer,index)
            index+=struct.calcsize('784B')
            im=np.array(im)
            im=im.reshape(1,784)/255.0
            self.confirm_imag_list[j,:]=im
            j=j+1

    def read_train_label(self,filename):
        print 'reading train-label data...'
        binfile=open(filename,'rb')
        buffer=binfile.read()
        index=0
        magic,num= struct.unpack_from('>II',buffer,index)
        index+=struct.calcsize('II')
        for i in range(self.num_train):
            lb=struct.unpack_from('B',buffer,index)
            index+=struct.calcsize('B')
            lb=int(lb[0])
            self.train_label_list[i,:]=lb
        j=0
        for i in range(self.num_train,self.num_train+self.num_confirm):
            lb=struct.unpack_from('B',buffer,index)
            index+=struct.calcsize('B')
            lb=int(lb[0])
            self.confirm_label_list[j,:]=lb
            j=j+1


    def read_test_image(self,filename):
        print 'reading test-image data...'
        binfile=open(filename,'rb')
        buffer=binfile.read()
        index=0
        magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index)
        index+=struct.calcsize('IIII')

        for i in range(self.num_test):
            im=struct.unpack_from('784B',buffer,index)
            index+=struct.calcsize('784B')
            im=np.array(im)
            im=im.reshape(1,784)/256.0
            self.test_imag_list[i,:]=im


    def read_test_label(self,filename):
        print 'reading test-label data...'
        binfile=open(filename,'rb')
        buffer=binfile.read()
        index=0
        magic,num= struct.unpack_from('>II',buffer,index)
        index+=struct.calcsize('II')

        for i in range(self.num_test):
            lb=struct.unpack_from('B',buffer,index)
            index+=struct.calcsize('B')
            lb=int(lb[0])
            self.test_label_list[i,:]=lb


    def init_network(self):
        print 'network initializing...'
        self.train_imag_list=np.zeros((self.num_train,self.node_in))
        self.train_label_list=np.zeros((self.num_train,1))
        self.confirm_imag_list=np.zeros((self.num_confirm,self.node_in))
        self.confirm_label_list=np.zeros((self.num_confirm,1))
        self.test_imag_list=np.zeros((self.num_test,self.node_in))
        self.test_label_list=np.zeros((self.num_test,1))

        self.read_train_image('train-images.idx3-ubyte')
        self.read_train_label('train-labels.idx1-ubyte')
        self.read_test_image('t10k-images.idx3-ubyte')
        self.read_test_label('t10k-labels.idx1-ubyte')

        self.wjk=(np.random.rand(self.node_hide,self.node_out)-0.5)*2/sqrt(self.node_hide)
        self.wj0=(np.random.rand(self.node_out)-0.5)*2/sqrt(self.node_hide)
        self.wij=(np.random.rand(self.node_in,self.node_hide)-0.5)*2/sqrt(self.node_in)
        self.wi0=(np.random.rand(self.node_hide)-0.5)*2/sqrt(self.node_in)


    def sigmode(self,x):
            return 1.0/(1.0+np.exp(-x))

    def calc_yjzk(self,sample_i,imag_list):
        self.netj=np.dot(imag_list[sample_i],self.wij)+self.wi0
        self.yj=self.sigmode(self.netj)

        self.netk=np.dot(self.yj,self.wjk)+self.wj0
        self.zk=self.sigmode(self.netk)

    def calc_error(self):
        ans=0.0
        for sample_i in range(self.num_confirm):
            self.calc_yjzk(sample_i,self.confirm_imag_list)
            label_tmp=np.zeros(self.node_out)
            label_tmp[int(self.confirm_label_list[sample_i])]=1
            ans=ans+sum(np.square(label_tmp-self.zk)/2.0)
        # print ans
        return ans

    def training(self):
        print 'training model...'
        for epoch_i in range(self.epoch):
            for circle in range(self.num_train):
                sample_i=np.random.randint(0,self.num_train)
                #print 'debug epoch:%d sample:%d' % (epoch_i,sample_i)
                #calc  error
                #error_before=self.calc_error()
                self.calc_yjzk(sample_i,self.train_imag_list)
                #update weight hide->out
                tmp_label=np.zeros(self.node_out)
                tmp_label[int(self.train_label_list[sample_i])]=1
                delta_k=(self.zk-tmp_label)*self.zk*(1-self.zk)
                self.yj.shape=(self.node_hide,1)
                delta_k.shape=(1,self.node_out)
                self.wjk=self.wjk-self.study_rate*np.dot(self.yj,delta_k)
                #update weight in->hide
                self.yj=self.yj.T
                delta_j=np.dot(delta_k,self.wjk.T)*self.yj*(1-self.yj)
                tmp_imag=self.train_imag_list[sample_i]
                tmp_imag.shape=(self.node_in,1)
                self.wij=self.wij-self.study_rate*np.dot(tmp_imag,delta_j)
                # calc error
                # self.calc_yjzk(sample_i,self.train_imag_list)
                # error_delta=error_before-self.calc_error()
                # if np.abs(error_delta)<self.error_limit:
                #     print 'debug break'
                #     print error_delta
                #     break
            #print 'error %d %.2f' % (epoch_i,self.calc_error())

    def testing(self):
        print 'testing...'
        num_right=0.0
        for sample_i in range(self.num_test):
            self.calc_yjzk(sample_i,self.test_imag_list)
            ans=self.zk.argmax()
            if ans==int(self.test_label_list[sample_i]):
                num_right=num_right+1
        self.accuracy=num_right/self.num_test
        print 'accuracy: %.4f' % (self.accuracy*100) +'%'
def main():
    data=Data()
    data.init_network()
    data.training()
    data.testing()

if __name__=='__main__':
    main()

注意

  1. 注意數據的編碼格式,在數據來源網站最底下有指出,上面還展示了一些機器學習的經典模型在minist數據集上的錯誤率可供參考
  2. 權值合理的初始化,及迭代次數,學習速率,隱層節點數的設置可參考經驗值
  3. 數據的歸一化(防止sigmode函數溢出)
  4. 矩陣乘法時注意行列條件的滿足
  5. 合理的epoch(即迭代次數,學習速率小的時候可以大一點的迭代次數,學習速率大的時候迭代次數取較小值)
  6. 確認合適的迭代次數后可去掉確認集,用全部的樣本數據訓練模型
  7. 隱層節點基本上越多越好

調參腳本

import ann

f=open('best_parameter', 'a+')
for e in range(10,40):
    for node in range(10,50):
        data=ann.Data()
        data.node_hide=node
        data.epoch=e
        data.init_network()
        data.training()
        data.testing()
        ans='circling to get best parameter----->epoch:%d hide_node:%d accuracy:%.4f\n' % (e,node,data.accuracy)
        print ans
        f.write(ans)
f.close()

可迭代計算迭代次數和隱層節點的數目對准確率的影響,大致規律是在學習速率0.05時,迭代次數在10-15為宜,隱層節點30以上

一些試驗的結果如下:

circling to get best parameter----->epoch:14 hide_node:43 accuracy:0.9656
circling to get best parameter----->epoch:14 hide_node:44 accuracy:0.9651
circling to get best parameter----->epoch:14 hide_node:45 accuracy:0.9638
circling to get best parameter----->epoch:14 hide_node:46 accuracy:0.9641
circling to get best parameter----->epoch:14 hide_node:47 accuracy:0.9649
circling to get best parameter----->epoch:14 hide_node:48 accuracy:0.9651
circling to get best parameter----->epoch:14 hide_node:49 accuracy:0.9671
circling to get best parameter----->epoch:15 hide_node:46 accuracy:0.9661
circling to get best parameter----->epoch:15 hide_node:47 accuracy:0.9660
circling to get best parameter----->epoch:15 hide_node:48 accuracy:0.9650
circling to get best parameter----->epoch:15 hide_node:49 accuracy:0.9655
circling to get best parameter----->epoch:10 hide_node:100 accuracy:0.9685
circling to get best parameter----->epoch:10 hide_node:200 accuracy:0.9724
circling to get best parameter----->epoch:10 hide_node:300 accuracy:0.9718
circling to get best parameter----->epoch:10 hide_node:1000 accuracy:0.9568

Tensorflow實現

import argparse

# Import data
from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf

FLAGS = None

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


def add_layer(inputs, in_size, out_size, activation_function=None):
    # add a fully collected layer
    Weights = weight_variable([in_size, out_size])
    biases = bias_variable([out_size])
    Wx_plus_b = tf.matmul(inputs, Weights) + biases
    if activation_function is None:
        outputs = Wx_plus_b
    else:
        outputs = activation_function(Wx_plus_b)
    return outputs


def main(_):
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)



    # reshape the input to have batch size, width, height, channel size
    x = tf.placeholder(tf.float32, [None, 784])
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # 5*5 patch size, input channel is 1, output channel is 32
    W_conv1 = weight_variable([5, 5, 1, 32])

    # bias, same size with the output channel
    b_conv1 = bias_variable([32])

    # the first convolutional layer with a max pooling layer
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    #after pooling, we have a tensor with shape[-1, 14, 14, 32]

    # the weights and bias for the second layer, we will get 64 channels
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    # the second convolutional layer with a max pooling layer
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    # after pooling, we have a tensor with shape[-1, 7, 7, 64]

    # add a fully connected layer with 1024 neurons and use relu as the activation function
    h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])
    h_fc1 = add_layer(h_pool2_flat, 7*7*64, 1024, tf.nn.relu)

    # we add dropout for the fully connected layer to avoid overfitting
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # finally, the output layer
    y_conv = add_layer(h_fc1_drop, 1024, 10, None)




    # loss function and so on
    y_ = tf.placeholder(tf.float32, [None, 10])
    cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # start training, and we test our model every 100 steps
    sess = tf.InteractiveSession()
    sess.run(tf.initialize_all_variables())
    for i in range(10000):
        batch = mnist.train.next_batch(100)
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
            test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
            print("step %d, training accuracy %g, test accuracy %g" % (i, train_accuracy, test_accuracy))

        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})



if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # modify the dir path to your own dataset
    parser.add_argument('--data_dir', type=str, default='/tmp/mnist',
                        help='Directory for storing data')
    FLAGS = parser.parse_args()
    tf.app.run()

需要配置tensorflow和python3.+的運行環境

結果如下

step 0, training accuracy 0.06, test accuracy 0.0892
step 100, training accuracy 0.86, test accuracy 0.8692
step 200, training accuracy 0.97, test accuracy 0.9207
step 300, training accuracy 0.92, test accuracy 0.9403
step 400, training accuracy 0.95, test accuracy 0.9485
step 500, training accuracy 0.91, test accuracy 0.9522
step 600, training accuracy 0.97, test accuracy 0.9565
step 700, training accuracy 0.97, test accuracy 0.9622
step 800, training accuracy 0.96, test accuracy 0.9638
step 900, training accuracy 0.98, test accuracy 0.9687
step 1000, training accuracy 0.97, test accuracy 0.9703

有任何環境配置的問題請聯系,歡迎指出錯誤


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM