CS231n 2016 通關第五、六章 Fully-Connected Neural Nets 作業

本文轉載自查看原文 2016-06-04 21:21 1534 DL/ CS231n 通關

要求：實現任意層數的NN。

每一層結構包含：

　　1、前向傳播和反向傳播函數；2、每一層計算的相關數值

cell 1 依舊是顯示的初始設置

 1 # As usual, a bit of setup
 2 
 3 import time
 4 import numpy as np
 5 import matplotlib.pyplot as plt
 6 from cs231n.classifiers.fc_net import *
 7 from cs231n.data_utils import get_CIFAR10_data
 8 from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
 9 from cs231n.solver import Solver
10 
11 %matplotlib inline
12 plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
13 plt.rcParams['image.interpolation'] = 'nearest'
14 plt.rcParams['image.cmap'] = 'gray'
15 
16 # for auto-reloading external modules
17 # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
18 %load_ext autoreload
19 %autoreload 2
20 
21 def rel_error(x, y):
22   """ returns relative error """
23   return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

cell 2 讀取cifar數據，並顯示維度信息

1 # Load the (preprocessed) CIFAR10 data.
2 
3 data = get_CIFAR10_data()
4 for k, v in data.iteritems():
5   print '%s: ' % k, v.shape

cell 3 使用隨機生成的數據，測試affine 前向傳播函數

 1 # Test the affine_forward function
 2 
 3 num_inputs = 2
 4 input_shape = (4, 5, 6)
 5 output_dim = 3
 6 
 7 input_size = num_inputs * np.prod(input_shape)
 8 # input_size        240 
 9 weight_size = output_dim * np.prod(input_shape)
10 # iweight_size   360 
11 x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
12 #(2,4,5,6)     -1->0.5
13 w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
14 #(120, 3)     -0.2->0.3
15 b = np.linspace(-0.3, 0.1, num=output_dim)
16 #(3,)             0.3->0.1
17 #2  num_inputs  120 input_shape 2*120  * 120*3 >>2*3
18 out, _ = affine_forward(x, w, b)
19 correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
20                         [ 3.25553199,  3.5141327,   3.77273342]])
21 
22 # Compare your output with ours. The error should be around 1e-9.
23 print 'Testing affine_forward function:'
24 print 'difference: ', rel_error(out, correct_out)

　　結果：

　　affine_forward(x, w, b)函數內容

 1 def affine_forward(x, w, b):
 2   """
 3   Computes the forward pass for an affine (fully-connected) layer.
 4 
 5   The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N
 6   examples, where each example x[i] has shape (d_1, ..., d_k). We will
 7   reshape each input into a vector of dimension D = d_1 * ... * d_k, and
 8   then transform it to an output vector of dimension M.
 9 
10   Inputs:
11   - x: A numpy array containing input data, of shape (N, d_1, ..., d_k)
12   - w: A numpy array of weights, of shape (D, M)
13   - b: A numpy array of biases, of shape (M,)
14   
15   Returns a tuple of:
16   - out: output, of shape (N, M)
17   - cache: (x, w, b)
18   """
19   out = None
20   #############################################################################
21   # TODO: Implement the affine forward pass. Store the result in out. You     #
22   # will need to reshape the input into rows.                                 #
23   #############################################################################
24   N = x.shape[0]
25   D = x.size / N
26   x = x.reshape(N, D)
27   #2  num_inputs  120 input_shape 2*120  * 120*3 >>2*3
28   out = np.dot(x,w) + b
29   #############################################################################
30   #                             END OF YOUR CODE                              #
31   #############################################################################
32   cache = (x, w, b)
33   return out, cache

cell 4 反向傳播，計算梯度是否正確

 1 # Test the affine_backward function
 2 
 3 x = np.random.randn(10, 2, 3)
 4 w = np.random.randn(6, 5)
 5 b = np.random.randn(5)
 6 dout = np.random.randn(10, 5)
 7 #x (10,2,3)      w (6,5)       b 5       dout (10,5)
 8 dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
 9 dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
10 db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)
11 _, cache = affine_forward(x, w, b)
12 #g = lambda i : range(i)
13 #print g(len(cache))
14 #for i in range (len(cache)):
15 #  print  cache[i].shape 
16 #(10, 6)
17 #(6, 5)
18 #(5,)
19 dx, dw, db = affine_backward(dout, cache)
20 print dx.shape
21 dx = dx.reshape(10, 2, 3)
22 # The error should be around 1e-10
23 print 'Testing affine_backward function:'
24 print 'dx error: ', rel_error(dx_num, dx)
25 print 'dw error: ', rel_error(dw_num, dw)
26 print 'db error: ', rel_error(db_num, db)

　　結果：

　　　affine_backward(dout, cache)內容：

 1 def affine_backward(dout, cache):
 2   """
 3   Computes the backward pass for an affine layer.
 4 
 5   Inputs:
 6   - dout: Upstream derivative, of shape (N, M)
 7   - cache: Tuple of:
 8     - x: Input data, of shape (N, d_1, ... d_k)
 9     - w: Weights, of shape (D, M)
10 
11   Returns a tuple of:
12   - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
13   - dw: Gradient with respect to w, of shape (D, M)
14   - db: Gradient with respect to b, of shape (M,)
15   """
16   x, w, b = cache
17   dx, dw, db = None, None, None
18   #(10, 6)
19   #(6, 5)
20   #(5,)
21   #############################################################################
22   # TODO: Implement the affine backward pass.                                 #
23   #############################################################################
24   #loss   ==>>dout 10 *5
25   #dx    ==>> 10*5 *  5*6 >>>10*6
26   dx = np.dot(dout,w.T)
27   #dw   ==>>6*10 * 10*5 >>>6*5
28   dw = np.dot(x.T,dout)
29   # db  ==>> 5
30   db = np.sum(dout,axis=0)
31   #############################################################################
32   #                             END OF YOUR CODE                              #
33   #############################################################################
34   return dx, dw, db

cell 5 ReLU 的前向傳播

 1 # Test the relu_forward function
 2 
 3 x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)
 4 
 5 out, _ = relu_forward(x)
 6 correct_out = np.array([[ 0.,          0.,          0.,          0.,        ],
 7                         [ 0.,          0.,          0.04545455,  0.13636364,],
 8                         [ 0.22727273,  0.31818182,  0.40909091,  0.5,       ]])
 9 # Compare your output with ours. The error should be around 1e-8
10 print 'Testing relu_forward function:'
11 print 'difference: ', rel_error(out, correct_out)

　　結果：

　　relu_forward(x)內容：

 1 def relu_forward(x):
 2   """
 3   Computes the forward pass for a layer of rectified linear units (ReLUs).
 4 
 5   Input:
 6   - x: Inputs, of any shape
 7 
 8   Returns a tuple of:
 9   - out: Output, of the same shape as x
10   - cache: x
11   """
12   out = None
13   #############################################################################
14   # TODO: Implement the ReLU forward pass.                                    #
15   #############################################################################
16   out = x*(x>0)
17   #############################################################################
18   #                             END OF YOUR CODE                              #
19   #############################################################################
20   cache = x
21   return out, cache

cell 6 ReLU 反向傳播

1 x = np.random.randn(10, 10)
2 dout = np.random.randn(*x.shape)
3 dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout)
4 _, cache = relu_forward(x)
5 dx = relu_backward(dout, cache)
6 # The error should be around 1e-12
7 print 'Testing relu_backward function:'
8 print 'dx error: ', rel_error(dx_num, dx)

　　結果：

　　relu_forward(x)內容：

 1 def relu_backward(dout, cache):
 2   """
 3   Computes the backward pass for a layer of rectified linear units (ReLUs).
 4 
 5   Input:
 6   - dout: Upstream derivatives, of any shape
 7   - cache: Input x, of same shape as dout
 8 
 9   Returns:
10   - dx: Gradient with respect to x
11   """
12   dx, x = None, cache
13   #############################################################################
14   # TODO: Implement the ReLU backward pass.                                   #
15   #############################################################################
16   dx = dout * (x>=0)
17   #############################################################################
18   #                             END OF YOUR CODE                              #
19   #############################################################################
20   return dx

cell 7 affine + ReLU 組合：

 1 from cs231n.layer_utils import affine_relu_forward, affine_relu_backward
 2 
 3 x = np.random.randn(2, 3, 4)
 4 w = np.random.randn(12, 10)
 5 b = np.random.randn(10)
 6 dout = np.random.randn(2, 10)
 7 
 8 out, cache = affine_relu_forward(x, w, b)
 9 dx, dw, db = affine_relu_backward(dout, cache)
10 
11 dx_num = eval_numerical_gradient_array(lambda x: affine_relu_forward(x, w, b)[0], x, dout)
12 dw_num = eval_numerical_gradient_array(lambda w: affine_relu_forward(x, w, b)[0], w, dout)
13 db_num = eval_numerical_gradient_array(lambda b: affine_relu_forward(x, w, b)[0], b, dout)
14 
15 dx = dx.reshape(2, 3, 4)
16 print 'Testing affine_relu_forward:'
17 print 'dx error: ', rel_error(dx_num, dx)
18 print 'dw error: ', rel_error(dw_num, dw)
19 print 'db error: ', rel_error(db_num, db)

　　結果：

　　 affine_relu_forward(x, w, b):

 1 def affine_relu_forward(x, w, b):
 2   """
 3   Convenience layer that perorms an affine transform followed by a ReLU
 4 
 5   Inputs:
 6   - x: Input to the affine layer
 7   - w, b: Weights for the affine layer
 8 
 9   Returns a tuple of:
10   - out: Output from the ReLU
11   - cache: Object to give to the backward pass
12   """
13   a, fc_cache = affine_forward(x, w, b)
14   out, relu_cache = relu_forward(a)
15   cache = (fc_cache, relu_cache)
16   return out, cache

　　affine_relu_backward(dout, cache):

1 def affine_relu_backward(dout, cache):
2   """
3   Backward pass for the affine-relu convenience layer
4   """
5   fc_cache, relu_cache = cache
6   da = relu_backward(dout, relu_cache)
7   dx, dw, db = affine_backward(da, fc_cache)
8   return dx, dw, db

cell 8 Softmax SVM

　　這兩層的代碼在之前已經實現過。並且原文件也給出了。這里不再解釋。原理同上。

cell 9 Two-layer network

　　實現： The architecure should be affine - relu - affine - softmax.

　　原理依舊是鏈式法則。

　　　　先前向傳播，記錄傳播中用到的數值，之后的偏導需要用到，然后反向傳播。

 1 N, D, H, C = 3, 5, 50, 7
 2 X = np.random.randn(N, D)
 3 y = np.random.randint(C, size=N)
 4 
 5 std = 1e-2
 6 model = TwoLayerNet(input_dim=D, hidden_dim=H, num_classes=C, weight_scale=std)
 7 # 3 example 5 input 50 hidden 7 class
 8 #w1 5*50 b1 50 w2 50*7 b2 7
 9 print 'Testing initialization ... '
10 W1_std = abs(model.params['W1'].std() - std)
11 b1 = model.params['b1']
12 W2_std = abs(model.params['W2'].std() - std)
13 b2 = model.params['b2']
14 assert W1_std < std / 10, 'First layer weights do not seem right'
15 assert np.all(b1 == 0), 'First layer biases do not seem right'
16 assert W2_std < std / 10, 'Second layer weights do not seem right'
17 assert np.all(b2 == 0), 'Second layer biases do not seem right'
18 
19 print 'Testing test-time forward pass ... '
20 model.params['W1'] = np.linspace(-0.7, 0.3, num=D*H).reshape(D, H)
21 model.params['b1'] = np.linspace(-0.1, 0.9, num=H)
22 model.params['W2'] = np.linspace(-0.3, 0.4, num=H*C).reshape(H, C)
23 model.params['b2'] = np.linspace(-0.9, 0.1, num=C)
24 X = np.linspace(-5.5, 4.5, num=N*D).reshape(D, N).T
25 scores = model.loss(X)
26 correct_scores = np.asarray(
27   [[11.53165108,  12.2917344,   13.05181771,  13.81190102,  14.57198434, 15.33206765,  16.09215096],
28    [12.05769098,  12.74614105,  13.43459113,  14.1230412,   14.81149128, 15.49994135,  16.18839143],
29    [12.58373087,  13.20054771,  13.81736455,  14.43418138,  15.05099822, 15.66781506,  16.2846319 ]])
30 scores_diff = np.abs(scores - correct_scores).sum()
31 assert scores_diff < 1e-6, 'Problem with test-time forward pass'
32 
33 print 'Testing training loss (no regularization)'
34 y = np.asarray([0, 5, 1])
35 loss, grads = model.loss(X, y)
36 correct_loss = 3.4702243556
37 assert abs(loss - correct_loss) < 1e-10, 'Problem with training-time loss'
38 
39 model.reg = 1.0
40 loss, grads = model.loss(X, y)
41 correct_loss = 26.5948426952
42 assert abs(loss - correct_loss) < 1e-10, 'Problem with regularization loss'
43 
44 for reg in [0.0, 0.7]:
45   print 'Running numeric gradient check with reg = ', reg
46   model.reg = reg
47   loss, grads = model.loss(X, y)
48 
49   for name in sorted(grads):
50     f = lambda _: model.loss(X, y)[0]
51     grad_num = eval_numerical_gradient(f, model.params[name], verbose=False)
52     print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))

　　結果：

　　涉及的TwoLayerNet 類：

  1 class TwoLayerNet(object):
  2     """
  3     A two-layer fully-connected neural network with ReLU nonlinearity and
  4     softmax loss that uses a modular layer design. We assume an input dimension
  5     of D, a hidden dimension of H, and perform classification over C classes.
  6 
  7     The architecure should be affine - relu - affine - softmax.
  8 
  9     Note that this class does not implement gradient descent; instead, it
 10     will interact with a separate Solver object that is responsible for running
 11     optimization.
 12 
 13     The learnable parameters of the model are stored in the dictionary
 14     self.params that maps parameter names to numpy arrays.
 15     """
 16 
 17     def __init__(self, input_dim=3 * 32 * 32, hidden_dim=100, num_classes=10,
 18                  weight_scale=1e-3, reg=0.0):
 19         """
 20         Initialize a new network.
 21 
 22         Inputs:
 23         - input_dim: An integer giving the size of the input
 24         - hidden_dim: An integer giving the size of the hidden layer
 25         - num_classes: An integer giving the number of classes to classify
 26         - dropout: Scalar between 0 and 1 giving dropout strength.
 27         - weight_scale: Scalar giving the standard deviation for random
 28           initialization of the weights.
 29         - reg: Scalar giving L2 regularization strength.
 30         """
 31         self.params = {}
 32         self.reg = reg
 33         self.D = input_dim
 34         self.M = hidden_dim
 35         self.C = num_classes
 36         self.reg = reg
 37 
 38         w1 = weight_scale * np.random.randn(self.D, self.M)
 39         b1 = np.zeros(hidden_dim)
 40         w2 = weight_scale * np.random.randn(self.M, self.C)
 41         b2 = np.zeros(self.C)
 42 
 43         self.params.update({'W1': w1,
 44                             'W2': w2,
 45                             'b1': b1,
 46                             'b2': b2})
 47 
 48     def loss(self, X, y=None):
 49         """
 50         Compute loss and gradient for a minibatch of data.
 51 
 52         Inputs:
 53         - X: Array of input data of shape (N, d_1, ..., d_k)
 54         - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
 55 
 56         Returns:
 57         If y is None, then run a test-time forward pass of the model and return:
 58         - scores: Array of shape (N, C) giving classification scores, where
 59           scores[i, c] is the classification score for X[i] and class c.
 60 
 61         If y is not None, then run a training-time forward and backward pass and
 62         return a tuple of:
 63         - loss: Scalar value giving the loss
 64         - grads: Dictionary with the same keys as self.params, mapping parameter
 65           names to gradients of the loss with respect to those parameters.
 66         """
 67 
 68         #######################################################################
 69         # TODO: Implement the backward pass for the two-layer net. Store the loss  #
 70         # in the loss variable and gradients in the grads dictionary. Compute data #
 71         # loss using softmax, and make sure that grads[k] holds the gradients for  #
 72         # self.params[k]. Don't forget to add L2 regularization!                   #
 73         #                                                                          #
 74         # NOTE: To ensure that your implementation matches ours and you pass the   #
 75         # automated tests, make sure that your L2 regularization includes a factor #
 76         # of 0.5 to simplify the expression for the gradient.                      #
 77         #######################################################################
 78 
 79         W1, b1, W2, b2 = self.params['W1'], self.params[
 80             'b1'], self.params['W2'], self.params['b2']
 81 
 82         X = X.reshape(X.shape[0], self.D)
 83         # Forward into first layer
 84         hidden_layer, cache_hidden_layer = affine_relu_forward(X, W1, b1)
 85         # Forward into second layer
 86         scores, cache_scores = affine_forward(hidden_layer, W2, b2)
 87 
 88         # If y is None then we are in test mode so just return scores
 89         if y is None:
 90             return scores
 91 
 92         data_loss, dscores = softmax_loss(scores, y)
 93         reg_loss = 0.5 * self.reg * np.sum(W1**2)
 94         reg_loss += 0.5 * self.reg * np.sum(W2**2)
 95         loss = data_loss + reg_loss
 96 
 97         # Backpropagaton
 98         grads = {}
 99         # Backprop into second layer
100         dx1, dW2, db2 = affine_backward(dscores, cache_scores)
101         dW2 += self.reg * W2
102 
103         # Backprop into first layer
104         dx, dW1, db1 = affine_relu_backward(
105             dx1, cache_hidden_layer)
106         dW1 += self.reg * W1
107 
108         grads.update({'W1': dW1,
109                       'b1': db1,
110                       'W2': dW2,
111                       'b2': db2})
112 
113         return loss, grads

cell 10 使用獨立的solver對模型進行訓練。

　　之前訓練函數是包含在模型類的方法中的。這樣可以對參數》》batch size 正則衰減等值進行修改。

　　使用獨立的solver進行訓練，邏輯更清晰。

　　得到的結果用圖像顯示：

cell 13 建立隱藏層可選的模型

 1 N, D, H1, H2, C = 2, 15, 20, 30, 10
 2 X = np.random.randn(N, D)
 3 y = np.random.randint(C, size=(N,))
 4 
 5 for reg in [0, 3.14]:
 6   print 'Running check with reg = ', reg
 7   model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
 8                             reg=reg, weight_scale=5e-2, dtype=np.float64)
 9 
10   loss, grads = model.loss(X, y)
11   print 'Initial loss: ', loss
12 
13   for name in sorted(grads):
14     f = lambda _: model.loss(X, y)[0]
15     grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
16     print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))

　　由於其中的FullyConnectedNet類包含的內容較多，不在這里貼了。

　　主要步驟：

　　　　對於不同的層數，建立對應的參數：

1         Ws = {'W' + str(i + 1):
2               weight_scale * np.random.randn(dims[i], dims[i + 1]) for i in range(len(dims) - 1)}
3         b = {'b' + str(i + 1): np.zeros(dims[i + 1])
4              for i in range(len(dims) - 1)}

　　　　之后便是使用這些參數，原理是一致的。

cell 16 SGD+Momentum

 1 def sgd_momentum(w, dw, config=None):
 2   """
 3   Performs stochastic gradient descent with momentum.
 4 
 5   config format:
 6   - learning_rate: Scalar learning rate.
 7   - momentum: Scalar between 0 and 1 giving the momentum value.
 8     Setting momentum = 0 reduces to sgd.
 9   - velocity: A numpy array of the same shape as w and dw used to store a moving
10     average of the gradients.
11   """
12   if config is None: config = {}
13   config.setdefault('learning_rate', 1e-2)
14   config.setdefault('momentum', 0.9)
15   v = config.get('velocity', np.zeros_like(w))
16   
17   next_w = None
18   #############################################################################
19   # TODO: Implement the momentum update formula. Store the updated value in   #
20   # the next_w variable. You should also use and update the velocity v.       #
21   #############################################################################
22   v = config['momentum']*v - config['learning_rate']*dw
23   next_w = v+w
24   #############################################################################
25   #                             END OF YOUR CODE                              #
26   #############################################################################
27   config['velocity'] = v
28 
29   return next_w, config

　　相比較而言，sgd_momentum 收斂的速度更快。

cell 18 rmsprop

 1 def rmsprop(x, dx, config=None):
 2   """
 3   Uses the RMSProp update rule, which uses a moving average of squared gradient
 4   values to set adaptive per-parameter learning rates.
 5 
 6   config format:
 7   - learning_rate: Scalar learning rate.
 8   - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
 9     gradient cache.
10   - epsilon: Small scalar used for smoothing to avoid dividing by zero.
11   - cache: Moving average of second moments of gradients.
12   """
13   if config is None: config = {}
14   config.setdefault('learning_rate', 1e-2)
15   config.setdefault('decay_rate', 0.99)
16   config.setdefault('epsilon', 1e-8)
17   config.setdefault('cache', np.zeros_like(x))
18 
19   next_x = None
20   #############################################################################
21   # TODO: Implement the RMSprop update formula, storing the next value of x   #
22   # in the next_x variable. Don't forget to update cache value stored in      #  
23   # config['cache'].                                                          #
24   #############################################################################
25   config['cache'] = config['decay_rate']*config['cache'] + (1 - config['decay_rate'])*dx**2
26   next_x = x - config['learning_rate']*dx / (np.sqrt(config['cache']) + config['epsilon'])
27   #############################################################################
28   #                             END OF YOUR CODE                              #
29   #############################################################################
30 
31   return next_x, config

cell 19 adam

 1 def adam(x, dx, config=None):
 2   """
 3   Uses the Adam update rule, which incorporates moving averages of both the
 4   gradient and its square and a bias correction term.
 5 
 6   config format:
 7   - learning_rate: Scalar learning rate.
 8   - beta1: Decay rate for moving average of first moment of gradient.
 9   - beta2: Decay rate for moving average of second moment of gradient.
10   - epsilon: Small scalar used for smoothing to avoid dividing by zero.
11   - m: Moving average of gradient.
12   - v: Moving average of squared gradient.
13   - t: Iteration number.
14   """
15   if config is None: config = {}
16   config.setdefault('learning_rate', 1e-3)
17   config.setdefault('beta1', 0.9)
18   config.setdefault('beta2', 0.999)
19   config.setdefault('epsilon', 1e-8)
20   config.setdefault('m', np.zeros_like(x))
21   config.setdefault('v', np.zeros_like(x))
22   config.setdefault('t', 1e5)
23   
24   next_x = None
25   beta_1 = config['beta1']
26   beta_2 = config['beta2']
27   #############################################################################
28   # TODO: Implement the Adam update formula, storing the next value of x in   #
29   # the next_x variable. Don't forget to update the m, v, and t variables     #
30   # stored in config.                                                         #
31   #############################################################################
32   config['t'] = config['t'] + 1
33   config['m'] = config['m'] * config['beta1'] + (1 - config['beta1']) * dx
34   config['v'] = config['v'] * config['beta2'] + (1 - config['beta2']) * (dx ** 2)
35   beta_1 = 1 - (beta_1**config['t'])
36   beta_2 = np.sqrt(1 - (beta_2**config['t']))
37   config['learning_rate'] = config['learning_rate'] * (beta_2/beta_1)
38   next_x = x - ((config['learning_rate'] * config['m']) / (np.sqrt(config['v']+config['epsilon'])))
39   #############################################################################
40   #                             END OF YOUR CODE                              #
41   #############################################################################
42   
43   return next_x, config

4中方法的收斂速度比較：

　　最終會給出所有的代碼。

附：通關CS231n企鵝群：578975100 validation：DL-CS231n

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 CS231n assignment2 Q1 Fully-connected Neural Network CS231n 2016 通關第二章-KNN 作業分析 CS231n 2016 通關第四章-NN 作業 CS231n 2016 通關第三章-SVM與Softmax CS231N作業1 詳細實錄（1）：環境准備+Knn CS231N系列課程作業總結【cs231n作業筆記】一：KNN分類器【cs231n作業筆記】二：SVM分類器 CS231n assignment1 Q4 Two-Layer Neural Network CS231N assignment2 SVM

CS231n 2016 通關 第五、六章 Fully-Connected Neural Nets 作業

免責聲明！

CS231n 2016 通關第五、六章 Fully-Connected Neural Nets 作業