使用Python编写的神经网络无法正确学习

我的网络无法单独识别输入,它要么输出平均结果,要么偏向于某个特定的输出。我做错了什么?

import numpy as npsigmoid = lambda x: 1 / (1 + np.exp(-x))sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))ReLU = lambda x: np.maximum(0, x)ReLU_der = lambda x: x > 0class NeuralNetwork:    def __init__(self, shape: tuple):        self.layers = len(shape) # 层数        self.shape = shape # 每层的神经元数量        self.weights = [            np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])            for l in range(1, self.layers)        ] # 连接相邻层的权重矩阵列表        self.weighted_sums = [np.zeros(l) for l in shape]        self.activations = [np.zeros(l) for l in shape]    def inspect(self):        print("=============NeuralNetwork===============")        print(f"Shape: {self.shape}")        print(f"Weights: {self.weights}")        print(f"Activations: {self.activations}")    def forward_prop(self, X):        self.activations[0] = X        for l in range(1, self.layers):            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1]            self.activations[l] = sigmoid(self.weighted_sums[l])    def backprop(self, X, Y):        delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # 存储错误        delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # 输出错误        for l in reversed(range(self.layers - 2)): # 错误反向传播            delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l])        for l in range(self.layers - 1): # 在线更新权重            for j in range(self.shape[l + 1]):                self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]nn = NeuralNetwork((2, 2, 1))X = np.array([    [1, 0],    [0, 1],    [1, 1],    [0, 0]])Y = np.array([    [1],    [1],    [0],    [0]])# 我通过从训练集中随机选择一个示例来训练我的网络for _ in range(1000):    i = np.random.randint(0, 4)    nn.forward_prop(X[i])    nn.backprop(X[i], Y[i])for x in X:    nn.forward_prop(x)    print(nn.activations[-1])

回答:

反向传播的矩阵数学相当困难。特别令人困惑的是,权重矩阵列表和delta列表(实际上是偏置数组列表)的长度应该比网络中的层数少一个,这使得索引变得混乱。显然,问题是由于索引错误造成的。现在终于工作了!

import numpy as npsigmoid = lambda x: 1 / (1 + np.exp(-x))sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))ReLU = lambda x: np.maximum(0, x)ReLU_der = lambda x: x > 0class NeuralNetwork:    def __init__(self, shape: tuple):        self.layers = len(shape)        self.shape = shape        self.weights = [            np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])            for l in range(1, self.layers)        ]        self.biases = [np.zeros(l) for l in shape[1:]]        self.weighted_sums = [None for l in shape]        self.activations = [None for l in shape]        self.deltas = [None for l in shape[1:]]    def inspect(self):        print("=============NeuralNetwork===============")        print(f"Shape: {self.shape}")        print(f"Weights: {self.weights}")        print(f"Activations: {self.activations}")    def forward_prop(self, X):        self.activations[0] = X        for l in range(1, self.layers):            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1]            self.activations[l] = sigmoid(self.weighted_sums[l])    def backprop(self, X, Y, lr):        self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])        for l in range(self.layers - 2, 0, -1):            self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l])        for l in range(self.layers - 1):            for j in range(self.shape[l + 1]):                self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]            self.biases[l] += self.deltas[l]    def train(self, X, Y, lr, epochs):        for e in range(epochs):            if not e % 1000: self.test(X)            i = np.random.randint(len(X))            self.forward_prop(X[i])            self.backprop(X[i], Y[i], lr)    def test(self, X):        print()        for x in X:            self.forward_prop(x)            print(x, self.activations[-1])if __name__ == "__main__":    nn = NeuralNetwork((2, 3, 2, 1))    X = np.array([        [1, 0],        [0, 1],        [1, 1],        [0, 0]    ])    Y = np.array([        [1],        [1],        [0],        [0]    ])    nn.train(X, Y, 0.4, 20000)    nn.test(X)

Related Posts

使用LSTM在Python中预测未来值

这段代码可以预测指定股票的当前日期之前的值,但不能预测…

如何在gensim的word2vec模型中查找双词组的相似性

我有一个word2vec模型,假设我使用的是googl…

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

我试图使用 XGBoost 创建模型。 看起来我成功地…

ML Tuning – Cross Validation in Spark

我在https://spark.apache.org/…

如何在React JS中使用fetch从REST API获取预测

我正在开发一个应用程序,其中Flask REST AP…

如何分析ML.NET中多类分类预测得分数组?

我在ML.NET中创建了一个多类分类项目。该项目可以对…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注