什么是多層感知機?
多層感知機(MLP,Multilayer Perceptron)也叫人工神經網絡(ANN,Artificial Neural Network),除了輸入輸出層,它中間可以有多個隱層,最簡單的MLP只含一個隱層,即三層的結構,如下圖:
上圖可以看到,多層感知機層與層之間是全連接的。多層感知機最底層是輸入層,中間是隱藏層,最后是輸出層。
參考:https://blog.csdn.net/fg13821267836/article/details/93405572
多層感知機和感知機的區別?
我們來看下感知機是什么樣的:
從上述內容更可以看出,感知機是一個線性的二分類器,但不能對非線性的數據並不能進行有效的分類。因此便有了對網絡層次的加深,理論上,多層感知機可以模擬任何復雜的函數。
多層感知機的前向傳播過程?
這里以輸入層、一個隱含層,輸出層為例:
結合之前定義的字母標記,對於第二層的三個神經元的輸出則有:
將上述的式子轉換為矩陣表達式:
將第二層的前向傳播計算過程推廣到網絡中的任意一層,則:
多層感知機的反向傳播過程?
可參考:https://blog.csdn.net/xholes/article/details/78461164
下面是實現代碼:代碼來源:https://github.com/eriklindernoren/ML-From-Scratch
from __future__ import print_function, division import numpy as np import math from sklearn import datasets from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score, Plot from mlfromscratch.deep_learning.activation_functions import Sigmoid, Softmax from mlfromscratch.deep_learning.loss_functions import CrossEntropy class MultilayerPerceptron(): """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. Unrolled to display the whole forward and backward pass. Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape # Hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) # Output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # .............. # Forward Pass # .............. # HIDDEN LAYER hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) # OUTPUT LAYER output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) # ............... # Backward Pass # ............... # OUTPUT LAYER # Grad. w.r.t input of output layer grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) grad_v = hidden_output.T.dot(grad_wrt_out_l_input) grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) # HIDDEN LAYER # Grad. w.r.t input of hidden layer grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_l_input) grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) # Update weights (by gradient descent) # Move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # Use the trained model to predict labels of X def predict(self, X): # Forward pass: hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y)) if __name__ == "__main__": main()
運行結果:
Accuracy: 0.967966573816156
另外的一種實現是使用卷積神經網絡中的全連接層實現:
from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import numpy as np import sys sys.path.append("/content/drive/My Drive/learn/ML-From-Scratch/") # Import helper functions from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.deep_learning.layers import Dense, Dropout, Activation def main(): optimizer = Adam() #----- # MLP #----- data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) n_samples, n_features = X.shape n_hidden = 512 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(n_hidden, input_shape=(n_features,))) clf.add(Activation('leaky_relu')) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(10)) clf.add(Activation('softmax')) print () clf.summary(name="MLP") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print ("Accuracy:", accuracy) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(clf.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=range(10)) if __name__ == "__main__": main()
運行結果:
+-----+ | MLP | +-----+ Input Shape: (64,) +------------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +------------------------+------------+--------------+ | Dense | 33280 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | Dense | 262656 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | Dropout | 0 | (512,) | | Dense | 262656 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | Dropout | 0 | (512,) | | Dense | 262656 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | Dropout | 0 | (512,) | | Dense | 5130 | (10,) | | Activation (Softmax) | 0 | (10,) | +------------------------+------------+--------------+ Total Parameters: 826378 Training: 100% [------------------------------------------------] Time: 0:00:29
Accuracy: 0.9763231197771588