Python3 反向傳播神經網絡-Min-Batch(根據吳恩達課程講解編寫)


  1 # -*- coding: utf-8 -*-
  2 """
  3 Created on Sat Jan 20 13:47:54 2018
  4 
  5 @author: markli
  6 """
  7 import numpy as np;
  8 import random;
  9 
 10 def tanh(x):  
 11     return np.tanh(x);
 12 
 13 def tanh_derivative(x):  
 14     return 1.0 - np.tanh(x)*np.tanh(x);
 15 
 16 def logistic(x):  
 17     return 1/(1 + np.exp(-x));
 18 
 19 def logistic_derivative(x):  
 20     return logistic(x)*(1-logistic(x));
 21 
 22 def ReLU(x,a=1):
 23     return max(0,a * x);
 24 
 25 def ReLU_derivative(x,a=1):
 26     return 0 if x < 0 else a;
 27 
 28 
 29 class NeuralNetwork:
 30     '''
 31     Z = W * x + b
 32     A = sigmod(Z)
 33     Z 凈輸入
 34     x 樣本集合 m * n n 個特征 m 個樣本數量
 35     b 偏移量
 36     W 權重
 37     A 凈輸出
 38     '''
 39     def __init__(self,layers,active_function=[logistic],active_function_der=[logistic_derivative],learn_rate=0.9):
 40         self.weights = [2*np.random.randn(x,y)-1 for x,y in zip(layers[1:],layers[:-1])]; #weight 取值范圍(-1,1)
 41         self.B = [2*np.random.randn(x,1)-1 for x in layers[1:]]; #b 取值范圍(-1,1)
 42         self.learnRate = learn_rate;
 43         self.size = len(layers);
 44         self.sigmoids = [];
 45         self.sigmoids_der = [];
 46         for i in range(len(layers)-1):
 47             if(len(active_function) == self.size-1):
 48                 self.sigmoids = active_function;
 49             else:
 50                 self.sigmoids.append(active_function[0]);
 51             if(len(active_function_der)== self.size-1):
 52                 self.sigmoids_der = active_function_der;
 53             else:
 54                 self.sigmoids_der.append(active_function_der[0]);
 55     
 56    
 57     '''后向傳播算法'''
 58     def BackPropgation(self,X,Y): 
 59         """
 60         X size*n 維,size大小為Mini_Batch_size 值大小,n 個特征
 61         Y size*l 維,size大小為Mini_Batch_sieze 值大小,l 個類標簽
 62         一次計算size個樣本帶來的w,b的變化量
 63         """
 64         deltb = [np.zeros(b.shape) for b in self.B];
 65         deltw = [np.zeros(w.shape) for w in self.weights];
 66         
 67         active = np.transpose(X);
 68         actives = [active];
 69         zs = [];
 70         i=0;
 71         #前向傳播
 72         for w,b in zip(self.weights,self.B):
 73             z = np.dot(w,active) + b;
 74             zs.append(z);
 75             active = self.sigmoids[i](z);
 76             actives.append(active);
 77             i = i+1;
 78         
 79         Y = np.transpose(Y); #轉置
 80         cost = self.cost(actives[-1], Y) #成本函數 計算對a的一階導數
 81         z = zs[-1];
 82         delta = np.multiply(cost,self.sigmoids_der[-1](z)); #計算輸出層(最后一層)的變化量
 83         deltb[-1] = np.sum(delta,axis=1,keepdims=True); #計算輸出層(最后一層)b的size次累計變化量 l*1 維
 84         deltw[-1] = np.dot(delta, np.transpose(actives[-2]));#計算輸出層(最后一層)w的size次累計變化量 x*l 維
 85         for i in range(2,self.size):
 86             z = zs[-i]; #當前層的z值
 87             sp = self.sigmoids_der[-i](z); #對z的偏導數值
 88             delta = np.multiply(np.dot(np.transpose(self.weights[-i+1]), delta), sp); #求出當前層的誤差
 89             #deltb = delta;
 90             deltb[-i] = np.sum(delta,axis=1,keepdims=True); #當前層b的size次累計變化量 l*1 維
 91             deltw[-i] = np.dot(delta, np.transpose(actives[-i-1])); # 當前層w的size次累計變化量 x*l
 92             
 93         return deltw,deltb;
 94             
 95     def fit(self,X,Y,mini_batch_size,epochs=1000):
 96         
 97         N = len(Y);
 98         for i in range(epochs):
 99             randomlist = np.random.randint(0,N-mini_batch_size,int(N/mini_batch_size));
100             batch_X = [X[k:k+mini_batch_size] for k in randomlist];
101             batch_Y = [Y[k:k+mini_batch_size] for k in randomlist];
102             for m in range(len(batch_Y)):
103                 deltw,deltb = self.BackPropgation(batch_X[m],batch_Y[m]);
104                 self.weights = [w - (self.learnRate / mini_batch_size) * dw for w,dw in zip(self.weights,deltw)];
105                 self.B = [b - (self.learnRate / mini_batch_size) * db for b,db in zip(self.B,deltb)];
106 #        path = sys.path[0];
107 #        with open(path,'w',encoding='utf8') as f:
108 #            for j in range(len(self.weights)-1):
109 #                f.write(self.weights[j+1]);
110 #                f.write(self.activeFunction[j+1]);
111 #                f.write(self.activeFunctionDer[j+1]);
112 #        f.close();
113         
114             
115                 
116     
117     def predict(self,x):
118         """前向傳播"""
119         i = 0;
120         for b, w in zip(self.B, self.weights):
121             x = self.sigmoids[i](np.dot(w, x)+b);
122             i = i + 1;
123         return x
124     
125     def cost(self,a,y):
126         """
127         損失函數對z的偏導數的除輸出層對z的導數的因子部分
128         完整表達式 為 (a - y)* sigmod_derivative(z)
129         由於此處不知道輸出層的激活函數故不寫出來,在具體調用位置加上
130         """
131         return a-y;
132     
133         

該算法按照吳恩達先生講述的BP神經網絡算法編寫,實現了一次進行Mini_Batch_size 次的訓練。下面給出測試代碼和測試結果。

 1 import numpy as np
 2 from sklearn.datasets import load_digits
 3 from sklearn.metrics import confusion_matrix, classification_report
 4 from sklearn.preprocessing import LabelBinarizer
 5 from FullNeuralNetwork import  NeuralNetwork
 6 from sklearn.cross_validation import train_test_split
 7 
 8 
 9 
10 digits = load_digits();
11 X = digits.data;
12 y = digits.target;
13 X -= X.min(); # normalize the values to bring them into the range 0-1
14 X /= X.max();
15 
16 nn = NeuralNetwork([64,100,10]);
17 X_train, X_test, y_train, y_test = train_test_split(X, y);
18 labels_train = LabelBinarizer().fit_transform(y_train);
19 labels_test = LabelBinarizer().fit_transform(y_test);
20 
21 
22 # X_train.shape (1347,64)
23 #y_train.shape(1347)
24 #labels_train.shape (1347,10)
25 #labels_test.shape(450,10)
26 
27 print ("start fitting");
28 
29 #print(Data);
30 nn.fit(X_train,labels_train,epochs=500,mini_batch_size=8);
31 result = nn.predict(X_test.T);
32 predictions = [np.argmax(result[:,y]) for y in range(result.shape[1])];
33 
34 print(predictions);
35 #for i in range(result.shape[1]):
36 #    y = result[:,i];
37 #    predictions.append(np.argmax(y));
38 ##print(np.atleast_2d(predictions).shape);
39 print (confusion_matrix(y_test,predictions));
40 print (classification_report(y_test,predictions));
41  

測試結果:

總體效果還可以,需要調一調其中的參數。之前發布的代碼我后來仔細看了一下,發現算法有誤,現在改正過來了。基本沒什么錯誤了,哈哈哈。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM