CNN,POOLING反向傳播
阿新 • • 發佈:2018-12-06
def conv_backward(dZ, cache): #從緩衝中恢復引數,前向傳播時儲存的 (A_prev, W, b, hparameters) = cache (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape (f, f, n_C_prev, n_C) = W.shape stride = hparameters['stride'] pad = hparameters['pad'] (m, n_H, n_W, n_C) = dZ.shape dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev)) dW = np.zeros((f, f, n_C_prev, n_C)) db = np.zeros((1, 1, 1, n_C)) #補零操作 A_prev_pad = zero_pad(A_prev, pad) dA_prev_pad = zero_pad(dA_prev, pad) for i in range(m): # 4重迴圈 第一個樣本,第二個高度,第三個寬度,第四個厚度 a_prev_pad = A_prev_pad[i] da_prev_pad = dA_prev_pad[i] for h in range(n_H): for w in range(n_W): for c in range(n_C): #得到過濾器的二維座標 vert_start = h * stride vert_end = vert_start + f horiz_start = w * stride horiz_end = horiz_start + f #吧過濾器切出來 a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c] dW[:,:,:,c] += a_slice * dZ[i, h, w, c]#主要注意這裡dw值是所有過濾器算出來相加的 db[:,:,:,c] += dZ[i, h, w, c] # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :]) dA_prev[i, :, :, :] = dA_prev_pad[i, pad:-pad, pad:-pad, :] return dA_prev, dW, db
主要是理解卷積反向傳播dw計算是 所有過濾器加起來
池化層沒有引數更新,但是要把值傳遞回去,需要把每個值映射回卷積大小
def pool_backward(dA, cache, mode = "max"): (A_prev, hparameters) = cache stride = hparameters['stride'] f = hparameters['f'] m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape m, n_H, n_W, n_C = dA.shape dA_prev = np.zeros_like(A_prev) for i in range(m): a_prev = A_prev[i] for h in range(n_H): for w in range(n_W): for c in range(n_C): vert_start = h * stride vert_end = vert_start + f horiz_start = w * stride horiz_end = horiz_start + f if mode == "max": a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c] mask = create_mask_from_window(a_prev_slice) dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask * dA[i, vert_start, horiz_start, c] #這裡是把每個值投射到[f, f]大小,小變大 elif mode == "average": da = dA[i, vert_start, horiz_start, c] shape = (f, f) dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(da, shape) return dA_prev