以下實現參考吳恩達的作業。
一、 padding
def zero_pad(X, pad): """ Pad with zeros all images of the dataset X. The padding is applied to the height and width of an image, as illustrated in Figure 1. Argument: X -- python numpy array of shape (m, n_H, n_W, n_C) representing a batch of m images pad -- integer, amount of padding around each image on vertical and horizontal dimensions Returns: X_pad -- padded image of shape (m, n_H + 2*pad, n_W + 2*pad, n_C) """ X_pad = np.pad(X, ((0,0),(pad,pad),(pad,pad),(0,0)), 'constant', constant_values=(0,0)) return X_pad
從zero_pad的函數中,我們可以看出,我們只需要對原圖片矩陣進行padding操作,而m是圖片的個數,n_C則是channel的個數,這兩個維度並不需要我們做任何操作。
二、 卷積計算
def conv_single_step(a_slice_prev, W, b): s = a_slice_prev * W Z = np.sum(s) Z = Z + float(b) return Z
卷積計算的過程中,a_slice_prev是我們在圖片矩陣中的窗口,而W是filter的參數。隨后我們對求得的結果進行求和,然后加上常數b。
三、 卷積forward
1 def conv_forward(A_prev, W, b, hparameters): 2 """ 3 Implements the forward propagation for a convolution function 4 5 Arguments: 6 A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev) 7 W -- Weights, numpy array of shape (f, f, n_C_prev, n_C) 8 b -- Biases, numpy array of shape (1, 1, 1, n_C) 9 hparameters -- python dictionary containing "stride" and "pad" 10 11 Returns: 12 Z -- conv output, numpy array of shape (m, n_H, n_W, n_C) 13 cache -- cache of values needed for the conv_backward() function 14 """ 15 16 ### START CODE HERE ### 17 # Retrieve dimensions from A_prev's shape (≈1 line) 18 (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape 19 20 # Retrieve dimensions from W's shape (≈1 line) 21 (f, f, n_C_prev, n_C) = W.shape 22 23 # Retrieve information from "hparameters" (≈2 lines) 24 stride = hparameters['stride'] 25 pad = hparameters['pad'] 26 27 # Compute the dimensions of the CONV output volume using the formula given above. Hint: use int() to floor. (≈2 lines) 28 n_H = int((n_H_prev + 2 * pad - f) / stride + 1) 29 n_W = int((n_W_prev + 2 * pad - f) / stride + 1) 30 31 # Initialize the output volume Z with zeros. (≈1 line) 32 Z = np.zeros((m, n_H, n_W, n_C)) 33 34 # Create A_prev_pad by padding A_prev 35 A_prev_pad = zero_pad(A_prev, pad) 36 37 for i in range(m): # loop over the batch of training examples 38 a_prev_pad = A_prev_pad[i] # Select ith training example's padded activation 39 for h in range(n_H): # loop over vertical axis of the output volume 40 for w in range(n_W): # loop over horizontal axis of the output volume 41 for c in range(n_C): # loop over channels (= #filters) of the output volume 42 43 # Find the corners of the current "slice" (≈4 lines) 44 vert_start = h * stride 45 vert_end = h * stride + f 46 horiz_start = w * stride 47 horiz_end = w * stride + f 48 49 # Use the corners to define the (3D) slice of a_prev_pad (See Hint above the cell). (≈1 line) 50 a_slice_prev = a_prev_pad[vert_start : vert_end, horiz_start : horiz_end] 51 52 # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron. (≈1 line) 53 Z[i, h, w, c] = conv_single_step(a_slice_prev,W[:,:,:,c],b[:,:,:,c]) 54 55 ### END CODE HERE ### 56 57 # Making sure your output shape is correct 58 assert(Z.shape == (m, n_H, n_W, n_C)) 59 60 # Save information in "cache" for the backprop 61 cache = (A_prev, W, b, hparameters) 62 63 return Z, cache
參數中包含我們的圖片A_prev,W,b以及超參數padding和strides。我們首先通過元組的方式獲取了所有形狀參數。根據形狀對輸出結果初始化。隨后我們便可以對每一個圖片中的每一個窗口進行遍歷。通過f窗口長度的加法計算,我們得到窗口的橫縱坐標位置。隨后通過卷積計算得到最終結果。注意這里的參數適用於圖中的每一個窗口。
四、 池化層
def pool_forward(A_prev, hparameters, mode = "max"): """ Implements the forward pass of the pooling layer Arguments: A_prev -- Input data, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev) hparameters -- python dictionary containing "f" and "stride" mode -- the pooling mode you would like to use, defined as a string ("max" or "average") Returns: A -- output of the pool layer, a numpy array of shape (m, n_H, n_W, n_C) cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters """ # Retrieve dimensions from the input shape (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape # Retrieve hyperparameters from "hparameters" f = hparameters["f"] stride = hparameters["stride"] # Define the dimensions of the output n_H = int(1 + (n_H_prev - f) / stride) n_W = int(1 + (n_W_prev - f) / stride) n_C = n_C_prev # Initialize output matrix A A = np.zeros((m, n_H, n_W, n_C)) ### START CODE HERE ### for i in range(m): # loop over the training examples for h in range(n_H): # loop on the vertical axis of the output volume for w in range(n_W): # loop on the horizontal axis of the output volume for c in range (n_C): # loop over the channels of the output volume # Find the corners of the current "slice" (≈4 lines) vert_start = h * stride vert_end = vert_start + f horiz_start = w * stride horiz_end = horiz_start + f # Use the corners to define the current slice on the ith training example of A_prev, channel c. (≈1 line) a_prev_slice = A_prev[i, vert_start : vert_end, horiz_start : horiz_end, c] # Compute the pooling operation on the slice. Use an if statment to differentiate the modes. Use np.max/np.mean. if mode == "max": A[i, h, w, c] = np.max(a_prev_slice) elif mode == "average": A[i, h, w, c] = np.mean(a_prev_slice) ### END CODE HERE ### # Store the input and hparameters in "cache" for pool_backward() cache = (A_prev, hparameters) # Making sure your output shape is correct assert(A.shape == (m, n_H, n_W, n_C)) return A, cache
池化層的計算和之前的卷積層大同小異;我們需要注意的就是這里的參數中存在mode,其中包括max和average兩種模式。
五、 卷積層backward
def conv_backward(dZ, cache): """ Implement the backward propagation for a convolution function Arguments: dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, n_H, n_W, n_C) cache -- cache of values needed for the conv_backward(), output of conv_forward() Returns: dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev), numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev) dW -- gradient of the cost with respect to the weights of the conv layer (W) numpy array of shape (f, f, n_C_prev, n_C) db -- gradient of the cost with respect to the biases of the conv layer (b) numpy array of shape (1, 1, 1, n_C) """ ### START CODE HERE ### # Retrieve information from "cache" (A_prev, W, b, hparameters) = cache # Retrieve dimensions from A_prev's shape (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape # Retrieve dimensions from W's shape (f, f, n_C_prev, n_C) = W.shape # Retrieve information from "hparameters" stride = hparameters['stride'] pad = hparameters['pad'] # Retrieve dimensions from dZ's shape (m, n_H, n_W, n_C) = dZ.shape # Initialize dA_prev, dW, db with the correct shapes dA_prev = np.zeros(A_prev.shape) dW = np.zeros(W.shape) db = np.zeros(b.shape) # Pad A_prev and dA_prev A_prev_pad = zero_pad(A_prev, pad) dA_prev_pad = zero_pad(dA_prev, pad) for i in range(m): # loop over the training examples # select ith training example from A_prev_pad and dA_prev_pad a_prev_pad = A_prev_pad[i] da_prev_pad = dA_prev_pad[i] for h in range(n_H): # loop over vertical axis of the output volume for w in range(n_W): # loop over horizontal axis of the output volume for c in range(n_C): # loop over the channels of the output volume # Find the corners of the current "slice" vert_start = h * stride vert_end = h * stride + f horiz_start = w * stride horiz_end = w * stride + f # Use the corners to define the slice from a_prev_pad a_slice = a_prev_pad[vert_start : vert_end, horiz_start : horiz_end, : ] # Update gradients for the window and the filter's parameters using the code formulas given above da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[ i, h, w ,c] dW[:,:,:,c] += a_slice * dZ[ i, h, w ,c] db[:,:,:,c] += dZ[ i, h, w ,c] # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :]) dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :] ### END CODE HERE ### # Making sure your output shape is correct assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev)) return dA_prev, dW, db
這里對於dW,db的計算與BP神經網絡的計算相似。在更新參數時,我們對整個圖片所有位置進行遍歷,進行一次計算。
六、池化層backward
我們了解池化層的原理之后,就需要根據其特征構造backward,對於max池,我們需要創建一個mask來獲得我們的有效窗口。
def create_mask_from_window(x): """ Creates a mask from an input matrix x, to identify the max entry of x. Arguments: x -- Array of shape (f, f) Returns: mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x. """ ### START CODE HERE ### (≈1 line) mask = (x == np.max(x)) ### END CODE HERE ### return mask
對於average我們需要分配到窗口中的每個值。
def distribute_value(dz, shape): """ Distributes the input value in the matrix of dimension shape Arguments: dz -- input scalar shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz Returns: a -- Array of size (n_H, n_W) for which we distributed the value of dz """ ### START CODE HERE ### # Retrieve dimensions from shape (≈1 line) (n_H, n_W) = shape # Compute the value to distribute on the matrix (≈1 line) average = n_H * n_W # Create a matrix where every entry is the "average" value (≈1 line) a = dz / average * np.ones((n_H, n_W)) ### END CODE HERE ### return a
之后我們便可以通過和卷積層backward相同的方法,對圖片進行遍歷,我們將每一次得到的有效輸出dZ進行累加得到這一層的dZ。
def pool_backward(dA, cache, mode = "max"): """ Implements the backward pass of the pooling layer Arguments: dA -- gradient of cost with respect to the output of the pooling layer, same shape as A cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters mode -- the pooling mode you would like to use, defined as a string ("max" or "average") Returns: dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev """ ### START CODE HERE ### # Retrieve information from cache (≈1 line) (A_prev, hparameters) = cache # Retrieve hyperparameters from "hparameters" (≈2 lines) stride = hparameters['stride'] f = hparameters['f'] # Retrieve dimensions from A_prev's shape and dA's shape (≈2 lines) m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape m, n_H, n_W, n_C = dA.shape # Initialize dA_prev with zeros (≈1 line) dA_prev = np.zeros(A_prev.shape) for i in range(m): # loop over the training examples # select training example from A_prev (≈1 line) a_prev = A_prev[i] for h in range(n_H): # loop on the vertical axis for w in range(n_W): # loop on the horizontal axis for c in range(n_C): # loop over the channels (depth) # Find the corners of the current "slice" (≈4 lines) vert_start = h * stride vert_end = vert_start + f horiz_start = w * stride horiz_end = horiz_start + f # Compute the backward propagation in both modes. if mode == "max": # Use the corners and "c" to define the current slice from a_prev (≈1 line) a_prev_slice = a_prev[vert_start : vert_end, horiz_start : horiz_end, c] # Create the mask from a_prev_slice (≈1 line) mask = create_mask_from_window(a_prev_slice) # Set dA_prev to be dA_prev + (the mask multiplied by the correct entry of dA) (≈1 line) dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask * dA[i, h, w, c] elif mode == "average": # Get the value a from dA (≈1 line) da = dA[i, h, w, c] # Define the shape of the filter as fxf (≈1 line) shape = (f, f) # Distribute it to get the correct slice of dA_prev. i.e. Add the distributed value of da. (≈1 line) dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(da, shape) ### END CODE ### # Making sure your output shape is correct assert(dA_prev.shape == A_prev.shape) return dA_prev
