我的网络无法单独识别输入,它要么输出平均结果,要么偏向于某个特定的输出。我做错了什么?
import numpy as npsigmoid = lambda x: 1 / (1 + np.exp(-x))sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))ReLU = lambda x: np.maximum(0, x)ReLU_der = lambda x: x > 0class NeuralNetwork: def __init__(self, shape: tuple): self.layers = len(shape) # 层数 self.shape = shape # 每层的神经元数量 self.weights = [ np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])]) for l in range(1, self.layers) ] # 连接相邻层的权重矩阵列表 self.weighted_sums = [np.zeros(l) for l in shape] self.activations = [np.zeros(l) for l in shape] def inspect(self): print("=============NeuralNetwork===============") print(f"Shape: {self.shape}") print(f"Weights: {self.weights}") print(f"Activations: {self.activations}") def forward_prop(self, X): self.activations[0] = X for l in range(1, self.layers): self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] self.activations[l] = sigmoid(self.weighted_sums[l]) def backprop(self, X, Y): delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # 存储错误 delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # 输出错误 for l in reversed(range(self.layers - 2)): # 错误反向传播 delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l]) for l in range(self.layers - 1): # 在线更新权重 for j in range(self.shape[l + 1]): self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]nn = NeuralNetwork((2, 2, 1))X = np.array([ [1, 0], [0, 1], [1, 1], [0, 0]])Y = np.array([ [1], [1], [0], [0]])# 我通过从训练集中随机选择一个示例来训练我的网络for _ in range(1000): i = np.random.randint(0, 4) nn.forward_prop(X[i]) nn.backprop(X[i], Y[i])for x in X: nn.forward_prop(x) print(nn.activations[-1])
回答:
反向传播的矩阵数学相当困难。特别令人困惑的是,权重矩阵列表和delta列表(实际上是偏置数组列表)的长度应该比网络中的层数少一个,这使得索引变得混乱。显然,问题是由于索引错误造成的。现在终于工作了!
import numpy as npsigmoid = lambda x: 1 / (1 + np.exp(-x))sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))ReLU = lambda x: np.maximum(0, x)ReLU_der = lambda x: x > 0class NeuralNetwork: def __init__(self, shape: tuple): self.layers = len(shape) self.shape = shape self.weights = [ np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])]) for l in range(1, self.layers) ] self.biases = [np.zeros(l) for l in shape[1:]] self.weighted_sums = [None for l in shape] self.activations = [None for l in shape] self.deltas = [None for l in shape[1:]] def inspect(self): print("=============NeuralNetwork===============") print(f"Shape: {self.shape}") print(f"Weights: {self.weights}") print(f"Activations: {self.activations}") def forward_prop(self, X): self.activations[0] = X for l in range(1, self.layers): self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1] self.activations[l] = sigmoid(self.weighted_sums[l]) def backprop(self, X, Y, lr): self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) for l in range(self.layers - 2, 0, -1): self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l]) for l in range(self.layers - 1): for j in range(self.shape[l + 1]): self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j] self.biases[l] += self.deltas[l] def train(self, X, Y, lr, epochs): for e in range(epochs): if not e % 1000: self.test(X) i = np.random.randint(len(X)) self.forward_prop(X[i]) self.backprop(X[i], Y[i], lr) def test(self, X): print() for x in X: self.forward_prop(x) print(x, self.activations[-1])if __name__ == "__main__": nn = NeuralNetwork((2, 3, 2, 1)) X = np.array([ [1, 0], [0, 1], [1, 1], [0, 0] ]) Y = np.array([ [1], [1], [0], [0] ]) nn.train(X, Y, 0.4, 20000) nn.test(X)