最近忙里偷閑學習了一點機器學習的知識,看到神經網絡算法時我和阿Kun便想到要將它用Python代碼實現。我們用了兩種不同的方法來編寫它。這里只放出我的代碼。
MNIST數據集基於美國國家標准與技術研究院的兩個數據集構建而成。
訓練集中包含250個人的手寫數字,其中50%是高中生,50%來自人口調查局。
每個訓練集的數字圖片像素為28x28。
MNIST數據集可通過 下載鏈接 下載,它包含以下內容:
- 訓練集圖像:train-images-idx3-ubyte.gz,包含60000個樣本
- 訓練集類標:train-labels-idx1-ubyte.gz,包含60000個類標
- 測試集圖像:t10k-images-idx3-ubyte.gz,包含10000個樣本
- 測試集類標:t10k-labels-idx1-ubyte.gz,包含10000個類標
關於神經網絡算法的詳解太過復雜,本人水平有限便不再描述,我這里只給出我們兩人的代碼。若想了解詳情請移步谷歌或者百度。
StarMan
github項目地址:https://github.com/MyBules/Neural-Network

import os import numpy as np import struct import matplotlib.pyplot as plt import sys from scipy.special import expit def load_mnist(path, kind = 'train'): ''' 讀取數據 :param path: 路徑 :param kind: 文件類型 :return: images: 60000*784 labels:手寫數字對應的類標(整數0~9) ''' labels_path = os.path.join(path, '%s-labels-idx1-ubyte' % kind) images_path = os.path.join(path, '%s-images-idx3-ubyte' % kind) with open(labels_path, 'rb') as lbpath: magic, n = struct.unpack('>II', lbpath.read(8)) labels = np.fromfile(lbpath, dtype=np.uint8) with open(images_path, 'rb') as imgpath: magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16)) images = np.fromfile(imgpath, dtype= np.uint8).reshape(len(labels), 784) # 28*28=784 return images, labels class NeuralNetMLP(object): def __init__(self, n_output, n_features, n_hidden=30, l1=0.0, l2=0.0, epochs=500, eta=0.001, alpha=0.0, decrease_const=0.0, shuffle=True, minibatches=1, random_state=None): ''' :param n_output: 輸出單元 :param n_features: 輸入單元 :param n_hidden: 隱層單元 :param l1: L1正則化系數 lamda :param l2: L2正則化系數 lamda :param epochs: 遍歷訓練集的次數(迭代次數) :param eta: 學習速率 :param alpha: 動量學習進度的參數,它在上一輪的基礎上增加一個因子,用於加快權重更新的學習 :param decrease_const: 用於降低自適應學習速率 n 的常數 d ,隨着迭代次數的增加而隨之遞減以更好地確保收斂 :param shuffle: 在每次迭代前打亂訓練集的順序,以防止算法陷入死循環 :param minibatches: 在每次迭代中,將訓練數據划分為 k 個小的批次,為加速學習的過程,梯度由每個批次分別計算,而不是在整個訓練集數據上進行計算。 :param random_state: ''' np.random.seed(random_state) self.n_output = n_output self.n_features = n_features self.n_hidden = n_hidden self.w1, self.w2 = self._initialize_weights() self.l1 = l1 self.l2 = l2 self.epochs = epochs self.eta = eta self.alpha = alpha self.decrease_const = decrease_const self.shuffle = shuffle self.minibatches = minibatches def _encode_labels(self, y, k): ''' :param y: :param k: :return: ''' onehot = np.zeros((k, y.shape[0])) for idx, val, in enumerate(y): onehot[val, idx] = 1.0 return onehot def _initialize_weights(self): ''' # 計算權重 :return: w1, w2 ''' w1 = np.random.uniform(-1.0, 1.0, size=self.n_hidden*(self.n_features + 1)) w1 = w1.reshape(self.n_hidden, self.n_features + 1) w2 = np.random.uniform(-1.0, 1.0, size=self.n_output*(self.n_hidden + 1)) w2 = w2.reshape(self.n_output, self.n_hidden + 1) return w1, w2 def _sigmoid(self, z): ''' expit 等價於 1.0/(1.0 + np.exp(-z)) :param z: :return: 1.0/(1.0 + np.exp(-z)) ''' return expit(z) def _sigmoid_gradient(self, z): sg = self._sigmoid(z) return sg * (1 - sg) def _add_bias_unit(self, X, how='column'): if how == 'column': X_new = np.ones((X.shape[0], X.shape[1] + 1)) X_new[:, 1:] = X elif how =='row': X_new = np.ones((X.shape[0]+1, X.shape[1])) X_new[1:,:] = X else: raise AttributeError("'how' must be 'column' or 'row'") return X_new def _feedforward(self, X, w1, w2): a1 = self._add_bias_unit(X, how='column') z2 = w1.dot(a1.T) a2 = self._sigmoid(z2) a2 = self._add_bias_unit(a2, how='row') z3 = w2.dot(a2) a3 = self._sigmoid(z3) return a1, z2, a2, z3, a3 def _L2_reg(self, lambda_, w1, w2): return (lambda_/2.0) * (np.sum(w1[:, 1:] ** 2) + np.sum(w2[:, 1:] ** 2)) def _L1_reg(self, lambda_, w1, w2): return (lambda_/2.0) * (np.abs(w1[:,1:]).sum() + np.abs(w2[:, 1:]).sum()) def _get_cost(self, y_enc, output, w1, w2): term1 = -y_enc * (np.log(output)) term2 = (1 - y_enc) * np.log(1 - output) cost = np.sum(term1 - term2) L1_term = self._L1_reg(self.l1, w1, w2) L2_term = self._L2_reg(self.l2, w1, w2) cost = cost + L1_term + L2_term return cost def _get_gradient(self, a1, a2, a3, z2, y_enc, w2, w1): # 反向傳播 sigma3 = a3 - y_enc z2 = self._add_bias_unit(z2, how='row') sigma2 = w2.T.dot(sigma3) * self._sigmoid_gradient(z2) sigma2 = sigma2[1:, :] grad1 = sigma2.dot(a1) grad2 = sigma3.dot(a2.T) # 調整 grad1[:, 1:] += (w1[:, 1:] * (self.l1 + self.l2)) grad2[:, 1:] += (w2[:, 1:] * (self.l1 + self.l2)) return grad1, grad2 def predict(self, X): a1, z2, a2, z3, a3 = self._feedforward(X, self.w1, self.w2) y_pred = np.argmax(z3, axis=0) return y_pred def fit(self, X, y, print_progress=False): self.cost_ = [] X_data, y_data = X.copy(), y.copy() y_enc = self._encode_labels(y, self.n_output) delta_w1_prev = np.zeros(self.w1.shape) delta_w2_prev = np.zeros(self.w2.shape) for i in range(self.epochs): # 自適應學習率 self.eta /= (1 + self.decrease_const*i) if print_progress: sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs)) sys.stderr.flush() if self.shuffle: idx = np.random.permutation(y_data.shape[0]) X_data, y_data = X_data[idx], y_data[idx] mini = np.array_split(range(y_data.shape[0]), self.minibatches) for idx in mini: # 前饋 a1, z2, a2, z3, a3 = self._feedforward(X[idx], self.w1, self.w2) cost = self._get_cost(y_enc=y_enc[:, idx], output=a3, w1=self.w1, w2=self.w2) self.cost_.append(cost) # 通過反向傳播計算梯度 grad1, grad2 = self._get_gradient(a1=a1, a2=a2, a3=a3, z2=z2, y_enc=y_enc[:, idx], w1=self.w1, w2=self.w2) # 更新權重 delta_w1, delta_w2 = self.eta * grad1, self.eta * grad2 self.w1 -= (delta_w1 + (self.alpha * delta_w1_prev)) self.w2 -= (delta_w2 + (self.alpha * delta_w2_prev)) delta_w1_prev, delta_w2_prev = delta_w1, delta_w2 return self def costplt1(nn): '''代價函數圖象''' plt.plot(range(len(nn.cost_)), nn.cost_) plt.ylim([0, 2000]) plt.ylabel('Cost') plt.xlabel('Epochs * 50') plt.tight_layout() plt.show() def costplt2(nn): '''代價函數圖象''' batches = np.array_split(range(len(nn.cost_)), 1000) cost_ary = np.array(nn.cost_) cost_avgs = [np.mean(cost_ary[i]) for i in batches] plt.plot(range(len(cost_avgs)), cost_avgs, color='red') plt.ylim([0, 10000]) plt.ylabel('Cost') plt.xlabel('Epochs') plt.tight_layout() plt.show() if __name__ == '__main__': path = 'mnist' # 路徑 # images, labels = load_mnist(path) # print(np.shape(images), labels) # 訓練樣本和測試樣本 X_train, y_train = load_mnist(path, kind='train') # X_train : 60000*784 # print(np.shape(X_train),y_train) X_test, y_test = load_mnist(path, kind='t10k') # X_test : 10000*784 # print(np.shape(X_test), y_test) nn = NeuralNetMLP(n_output=10, n_features=X_train.shape[1], n_hidden=50, l2=0.1, l1=0.0, epochs=1000, eta=0.001, alpha=0.001, decrease_const=0.00001, shuffle=True, minibatches=50, random_state=1) nn.fit(X_train, y_train, print_progress=True) costplt1(nn) costplt2(nn) y_train_pred = nn.predict(X_train) acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0] print('訓練准確率: %.2f%%' % (acc * 100)) y_test_pred = nn.predict(X_test) acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0] print('測試准確率: %.2f%%' % (acc * 100)) # 錯誤樣本 miscl_img = X_test[y_test != y_test_pred][:25] correct_lab = y_test[y_test != y_test_pred][:25] miscl_lab = y_test_pred[y_test != y_test_pred][:25] fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True,) ax = ax.flatten() for i in range(25): img = miscl_img[i].reshape(28, 28) ax[i].imshow(img, cmap='Greys', interpolation='nearest') ax[i].set_title('%d) t: %d p: %d' % (i+1, correct_lab[i], miscl_lab[i])) ax[0].set_xticks([]) ax[0].set_yticks([]) plt.tight_layout() plt.show() # 正確樣本 unmiscl_img = X_test[y_test == y_test_pred][:25] uncorrect_lab = y_test[y_test == y_test_pred][:25] unmiscl_lab = y_test_pred[y_test == y_test_pred][:25] fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True, ) ax = ax.flatten() for i in range(25): img = unmiscl_img[i].reshape(28, 28) ax[i].imshow(img, cmap='Greys', interpolation='nearest') ax[i].set_title('%d) t: %d p: %d' % (i + 1, uncorrect_lab[i], unmiscl_lab[i])) ax[0].set_xticks([]) ax[0].set_yticks([]) plt.tight_layout() plt.show()
測試結果:
代價函數圖像:
測試錯誤樣本:
測試正確樣本: