diff --git a/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.2-L2正则.md b/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.2-L2正则.md index 9557cebb..f6b005f1 100644 --- a/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.2-L2正则.md +++ b/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.2-L2正则.md @@ -157,7 +157,7 @@ $$ \begin{aligned} \frac{dJ}{dW2}&=\frac{dJ}{dZ2}\frac{dZ2}{dW2}+\frac{dJ}{dW2} \\ -&=(Z2-Y)\cdot A1^T+\lambda \odot W2 +&=A1^T\cdot (Z2-Y)+\lambda \odot W2 \end{aligned} \tag{9} $$ @@ -170,9 +170,9 @@ $$dB2=dZ2 \tag{10}$$ 再继续反向传播到第一层网络: -$$dZ1 = W2^T \times dZ2 \odot A1 \odot (1-A1) \tag{11}$$ +$$dZ1 = dZ2 \cdot W2^T \odot A1 \odot (1-A1) \tag{11}$$ -$$dW1= dZ1 \cdot X^T + \lambda \odot W1 \tag{12}$$ +$$dW1= X^T \cdot dZ1+ \lambda \odot W1 \tag{12}$$ $$dB1= dZ1 \tag{13}$$ @@ -183,13 +183,13 @@ $$dB1= dZ1 \tag{13}$$ dZ = delta_in m = self.x.shape[1] if self.regular == RegularMethod.L2: - self.weights.dW = (np.dot(dZ, self.x.T) + self.lambd * self.weights.W) / m + self.weights.dW = (np.dot(self.x.T, dZ) + self.lambd * self.weights.W) / m else: - self.weights.dW = np.dot(dZ, self.x.T) / m + self.weights.dW = np.dot(self.x.T, dZ) / m # end if self.weights.dB = np.sum(dZ, axis=1, keepdims=True) / m - delta_out = np.dot(self.weights.W.T, dZ) + delta_out = np.dot(dZ, self.weights.W.T) if len(self.input_shape) > 2: return delta_out.reshape(self.input_shape) diff --git a/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.3-L1正则.md b/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.3-L1正则.md index 670014ce..7e7b923c 100644 --- a/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.3-L1正则.md +++ b/A-基础教程/A2-神经网络基本原理/en-us/Step7 - DNN/16.3-L1正则.md @@ -147,10 +147,10 @@ $$J(w,b) = J_0 + \lambda (\lvert W1 \rvert+\lvert W2 \rvert)$$ $$ \begin{aligned} dW2&=\frac{dJ}{dW2}=\frac{dJ}{dZ2}\frac{dZ2}{dW2}+\frac{dJ}{dW2} \\\\ -&=dZ2 \cdot A1^T+\lambda \odot sign(W2) +&=A1^T \cdot dZ2+\lambda \odot sign(W2) \end{aligned} $$ -$$dW1= dZ1 \cdot X^T + \lambda \odot sign(W1) $$ +$$dW1= X^T \cdot dZ1 + \lambda \odot sign(W1) $$ 从上面的公式中可以看到,正则项在方向传播过程中,唯一影响的就是求W的梯度时,要增加一个$\lambda \odot sign(W)$,sign是符号函数,返回该值的符号,即1或-1。所以,我们可以修改`FullConnectionLayer.py`中的反向传播函数如下: @@ -159,11 +159,11 @@ def backward(self, delta_in, idx): dZ = delta_in m = self.x.shape[1] if self.regular == RegularMethod.L2: - self.weights.dW = (np.dot(dZ, self.x.T) + self.lambd * self.weights.W) / m + self.weights.dW = (np.dot(self.x.T, dZ) + self.lambd * self.weights.W) / m elif self.regular == RegularMethod.L1: - self.weights.dW = (np.dot(dZ, self.x.T) + self.lambd * np.sign(self.weights.W)) / m + self.weights.dW = (np.dot(self.x.T, dZ) + self.lambd * np.sign(self.weights.W)) / m else: - self.weights.dW = np.dot(dZ, self.x.T) / m + self.weights.dW = np.dot(self.x.T, dZ) / m # end if self.weights.dB = np.sum(dZ, axis=1, keepdims=True) / m ......