Python3 BP神經網絡


轉自麥子學院

  1 """
  2 network.py
  3 ~~~~~~~~~~
  4 
  5 A module to implement the stochastic gradient descent learning
  6 algorithm for a feedforward neural network.  Gradients are calculated
  7 using backpropagation.  Note that I have focused on making the code
  8 simple, easily readable, and easily modifiable.  It is not optimized,
  9 and omits many desirable features.
 10 """
 11 
 12 #### Libraries
 13 # Standard library
 14 import random
 15 
 16 # Third-party libraries
 17 import numpy as np
 18 
 19 class Network(object):
 20 
 21     def __init__(self, sizes):
 22         """The list ``sizes`` contains the number of neurons in the
 23         respective layers of the network.  For example, if the list
 24         was [2, 3, 1] then it would be a three-layer network, with the
 25         first layer containing 2 neurons, the second layer 3 neurons,
 26         and the third layer 1 neuron.  The biases and weights for the
 27         network are initialized randomly, using a Gaussian
 28         distribution with mean 0, and variance 1.  Note that the first
 29         layer is assumed to be an input layer, and by convention we
 30         won't set any biases for those neurons, since biases are only
 31         ever used in computing the outputs from later layers."""
 32         self.num_layers = len(sizes)
 33         self.sizes = sizes
 34         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 35         self.weights = [np.random.randn(y, x)
 36                         for x, y in zip(sizes[:-1], sizes[1:])]
 37 
 38     def feedforward(self, a):
 39         """Return the output of the network if ``a`` is input."""
 40         for b, w in zip(self.biases, self.weights):
 41             a = sigmoid(np.dot(w, a)+b)
 42         return a
 43 
 44     def SGD(self, training_data, epochs, mini_batch_size, eta,
 45             test_data=None):
 46         """Train the neural network using mini-batch stochastic
 47         gradient descent.  The ``training_data`` is a list of tuples
 48         ``(x, y)`` representing the training inputs and the desired
 49         outputs.  The other non-optional parameters are
 50         self-explanatory.  If ``test_data`` is provided then the
 51         network will be evaluated against the test data after each
 52         epoch, and partial progress printed out.  This is useful for
 53         tracking progress, but slows things down substantially."""
 54         if test_data: n_test = len(test_data)
 55         n = len(training_data)
 56         for j in range(epochs):
 57             random.shuffle(training_data)
 58             mini_batches = [
 59                 training_data[k:k+mini_batch_size]
 60                 for k in range(0, n, mini_batch_size)]
 61             for mini_batch in mini_batches:
 62                 self.update_mini_batch(mini_batch, eta)
 63             if test_data:
 64                 print ("Epoch {0}: {1} / {2}".format(
 65                     j, self.evaluate(test_data), n_test))
 66             else:
 67                 print ("Epoch {0} complete".format(j))
 68 
 69     def update_mini_batch(self, mini_batch, eta):
 70         """Update the network's weights and biases by applying
 71         gradient descent using backpropagation to a single mini batch.
 72         The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
 73         is the learning rate."""
 74         nabla_b = [np.zeros(b.shape) for b in self.biases]
 75         nabla_w = [np.zeros(w.shape) for w in self.weights]
 76         #一個一個的進行訓練  跟吳恩達的Mini-Batch 不一樣
 77         for x, y in mini_batch:
 78             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 79             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 80             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 81         self.weights = [w-(eta/len(mini_batch))*nw
 82                         for w, nw in zip(self.weights, nabla_w)]
 83         self.biases = [b-(eta/len(mini_batch))*nb
 84                        for b, nb in zip(self.biases, nabla_b)]
 85 
 86     def backprop(self, x, y):
 87         """Return a tuple ``(nabla_b, nabla_w)`` representing the
 88         gradient for the cost function C_x.  ``nabla_b`` and
 89         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
 90         to ``self.biases`` and ``self.weights``."""
 91         nabla_b = [np.zeros(b.shape) for b in self.biases]
 92         nabla_w = [np.zeros(w.shape) for w in self.weights]
 93         # feedforward
 94         activation = x
 95         activations = [x] # list to store all the activations, layer by layer
 96         zs = [] # list to store all the z vectors, layer by layer
 97         for b, w in zip(self.biases, self.weights):
 98             z = np.dot(w, activation)+b
 99             zs.append(z)
100             activation = sigmoid(z)
101             activations.append(activation)
102         # backward pass
103         delta = self.cost_derivative(activations[-1], y) * \
104             sigmoid_prime(zs[-1])
105         nabla_b[-1] = delta
106         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
107         # Note that the variable l in the loop below is used a little
108         # differently to the notation in Chapter 2 of the book.  Here,
109         # l = 1 means the last layer of neurons, l = 2 is the
110         # second-last layer, and so on.  It's a renumbering of the
111         # scheme in the book, used here to take advantage of the fact
112         # that Python can use negative indices in lists.
113         for l in range(2, self.num_layers):
114             z = zs[-l]
115             sp = sigmoid_prime(z)
116             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
117             nabla_b[-l] = delta
118             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
119         return (nabla_b, nabla_w)
120 
121     def evaluate(self, test_data):
122         """Return the number of test inputs for which the neural
123         network outputs the correct result. Note that the neural
124         network's output is assumed to be the index of whichever
125         neuron in the final layer has the highest activation."""
126         test_results = [(np.argmax(self.feedforward(x)), y)
127                         for (x, y) in test_data]
128         return sum(int(x == y) for (x, y) in test_results)
129 
130     def cost_derivative(self, output_activations, y):
131         """Return the vector of partial derivatives \partial C_x /
132         \partial a for the output activations."""
133         return (output_activations-y)
134 
135 #### Miscellaneous functions
136 def sigmoid(z):
137     """The sigmoid function."""
138     return 1.0/(1.0+np.exp(-z))
139 
140 def sigmoid_prime(z):
141     """Derivative of the sigmoid function."""
142     return sigmoid(z)*(1-sigmoid(z))

該算法比我之前寫的神經網絡算法准確率高,但是在測試過程中發現有錯誤,各個地方的注釋我是沒看明白,與理論結合不是很好。本人在他的基礎上進行了改進,提高了算法的擴展程度,自己也親測了改進后的代碼,效果杠杠的。

  1 # -*- coding: utf-8 -*-
  2 """
  3 Created on Thu Jan 18 15:27:24 2018
  4 
  5 @author: markli
  6 """
  7 
  8 import numpy as np;
  9 import random;
 10 
 11 def tanh(x):  
 12     return np.tanh(x);
 13 
 14 def tanh_derivative(x):  
 15     return 1.0 - np.tanh(x)*np.tanh(x);
 16 
 17 def logistic(x):  
 18     return 1/(1 + np.exp(-x));
 19 
 20 def logistic_derivative(x):  
 21     return logistic(x)*(1-logistic(x));
 22 
 23 def ReLU(x,a=1):
 24     return max(0,a * x);
 25 
 26 def ReLU_derivative(x,a=1):
 27     return 0 if x < 0 else a;
 28 
 29 class NeuralNetwork:
 30     '''
 31     Z = W * x + b
 32     A = sigmod(Z)
 33     Z 凈輸入
 34     x 樣本集合 n * m n 個特征 m 個樣本數量
 35     b 偏移量
 36     W 權重
 37     A 凈輸出
 38     '''
 39     def __init__(self,layers,active_function=[logistic],active_function_der=[logistic_derivative],learn_rate=0.9):
 40         """
 41         初始化神經網絡
 42         layer中存放每層的神經元數量,layer的長度即為網絡的層數
 43         active_function 為每一層指定一個激活函數,若長度為1則表示所有層使用同一個激活函數
 44         active_function_der 激活函數的導數
 45         learn_rate 學習速率 
 46         """
 47         self.weights = [np.random.randn(x,y) for x,y in zip(layers[1:],layers[:-1])];
 48         self.biases = [np.random.randn(x,1) for x in layers[1:]];
 49         self.size = len(layers);
 50         self.rate = learn_rate;
 51         self.sigmoids = [];
 52         self.sigmoids_der = [];
 53         for i in range(len(layers)-1):
 54             if(len(active_function) == self.size-1):
 55                 self.sigmoids = active_function;
 56             else:
 57                 self.sigmoids.append(active_function[0]);
 58             if(len(active_function_der)== self.size-1):
 59                 self.sigmoids_der = active_function_der;
 60             else:
 61                 self.sigmoids_der.append(active_function_der[0]);
 62         
 63     def fit(self,TrainData,epochs=1000,mini_batch_size=32):
 64         """
 65         運用后向傳播算法學習神經網絡模型
 66         TrainData 是(X,Y)值對
 67         X 輸入特征矩陣 m*n 維 n 個特征,m個樣本
 68         Y 輸入實際值 t*m 維 t個類別標簽,m個樣本
 69         epochs 迭代次數
 70         mini_batch_size mini_batch 一次的大小,不使用則mini_batch_size = 1
 71         """
 72         n = len(TrainData);
 73         for i in range(epochs):
 74             random.shuffle(TrainData)
 75             mini_batches = [
 76                 TrainData[k:k+mini_batch_size]
 77                 for k in range(0, n, mini_batch_size)];
 78             for mini_batch in mini_batches:
 79                 self.BP(mini_batch, self.rate);
 80         
 81         
 82         
 83         
 84     def predict(self, x):
 85         """前向傳播"""
 86         i = 0;
 87         for b, w in zip(self.biases, self.weights):
 88             x = self.sigmoids[i](np.dot(w, x)+b);
 89             i = i + 1;
 90         return x
 91     
 92     def BP(self,mini_batch,rate):
 93         """
 94         BP 神經網絡算法
 95         """
 96         size = len(mini_batch);
 97 
 98         nabla_b = [np.zeros(b.shape) for b in self.biases]; #存放每次訓練b的變化量
 99         nabla_w = [np.zeros(w.shape) for w in self.weights]; #存放每次訓練w的變化量
100         #一個一個的進行訓練  
101         for x, y in mini_batch:
102             delta_nabla_b, delta_nabla_w = self.backprop(x, y);
103             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]; #累加每次訓練b的變化量
104             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]; #累加每次訓練w的變化量
105         self.weights = [w-(rate/size)*nw
106                         for w, nw in zip(self.weights, nabla_w)];
107         self.biases = [b-(rate/size)*nb
108                        for b, nb in zip(self.biases, nabla_b)];
109             
110     def backprop(self, x, y):
111         """
112         x 是一維 的行向量
113         y 是一維行向量
114         """
115         nabla_b = [np.zeros(b.shape) for b in self.biases];
116         nabla_w = [np.zeros(w.shape) for w in self.weights];
117         # feedforward
118         activation = np.atleast_2d(x).reshape((len(x),1)); #轉換為列向量
119         activations = [activation]; # 存放每層a
120         zs = []; # 存放每z值
121         i = 0;
122         for b, w in zip(self.biases, self.weights):
123             z = np.dot(w, activation)+b;
124             zs.append(z);
125             activation = self.sigmoids[i](z);
126             activations.append(activation);
127             i = i + 1;
128         # backward pass
129         y = np.atleast_2d(y).reshape((len(y),1)); #將y轉化為列向量
130         #delta cost對z的偏導數
131         delta = self.cost_der(activations[-1], y) * \
132             self.sigmoids_der[-1](zs[-1]);
133         nabla_b[-1] = delta;
134         nabla_w[-1] = np.dot(delta, np.transpose(activations[-2]));
135         #從后往前遍歷每一層,從倒數第2層開始
141         for l in range(2, self.size):
142             z = zs[-l]; #當前層的z
143             sp = self.sigmoids_der[-l](z); #對z的偏導數值
144             delta = np.multiply(np.dot(np.transpose(self.weights[-l+1]), delta), sp); #求出當前層的誤差
145             nabla_b[-l] = delta;
146             nabla_w[-l] = np.dot(delta, np.transpose(activations[-l-1]));
147         return (nabla_b, nabla_w)
148     
149     """
150     損失函數
151     cost_der 差的平方損失函數對a 的導數
152     cost_cross_entropy_der 交叉熵損失函數對a的導數
153     """
154     def cost_der(self,a,y):
155         return a - y;
156     
157     def cost_cross_entropy_der(self,a,y):
158         return (a-y)/(a * (1-a));
159         
160         

以上是BP神經網絡算法源碼,下面給出一個數字識別程序,用來測試上述代碼的正確性。

 1 import numpy as np
 2 from sklearn.datasets import load_digits
 3 from sklearn.metrics import confusion_matrix, classification_report
 4 from sklearn.preprocessing import LabelBinarizer
 5 from network_mark import  NeuralNetwork
 6 from sklearn.cross_validation import train_test_split
 7 
 8 
 9 
10 digits = load_digits();
11 X = digits.data;
12 y = digits.target;
13 X -= X.min(); # normalize the values to bring them into the range 0-1
14 X /= X.max();
15 
16 nn = NeuralNetwork([64,100,10]);
17 X_train, X_test, y_train, y_test = train_test_split(X, y);
18 labels_train = LabelBinarizer().fit_transform(y_train);
19 labels_test = LabelBinarizer().fit_transform(y_test);
20 
21 
22 # X_train.shape (1347,64)
23 #y_train.shape(1347)
24 #labels_train.shape (1347,10)
25 #labels_test.shape(450,10)
26 
27 print ("start fitting");
28 Data = [(x,y) for x,y in zip(X_train,labels_train)];
29 #print(Data);
30 nn.fit(Data,epochs=500,mini_batch_size=32);
31 result = nn.predict(X_test.T);
32 predictions = [np.argmax(result[:,y]) for y in range(result.shape[1])];
33 
34 print(predictions);
35 #for i in range(result.shape[1]):
36 #    y = result[:,i];
37 #    predictions.append(np.argmax(y));
38 ##print(np.atleast_2d(predictions).shape);
39 print (confusion_matrix(y_test,predictions));
40 print (classification_report(y_test,predictions));
41  

最后是測試結果,效果很客觀。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM