我在尝试用C#实现一个反向传播神经网络时遇到了一个问题。在训练网络时,所有的输出要么是0.49…,要么是0.51…
这是我的网络类
namespace BackPropNetwork{public class Network { public double[][] Values { get; set; } public double[][] Deltas { get; set; } public double[][][] Weights { get; set; } public Network(params int[] size) { Values = new double[size.Length][]; Weights = new double[size.Length][][]; Deltas = new double[size.Length][]; Random r = new Random(); for(int i = 0; i < size.Length; i++) { Values[i] = new double[size[i]]; Weights[i] = new double[size[i]][]; Deltas[i] = new double[size[i]]; if (i != size.Length - 1) { for (int j = 0; j < size[i]; j++) { Weights[i][j] = new double[size[i + 1]]; for(int k= 0; k < size[i + 1]; k++) { Weights[i][j][k] = r.NextDouble() ; } } } } } public double[] FeedThrough (double[] input) { if(input.Length!= Values[0].Length) { throw new InvalidOperationException(); } Values[0] = input; for(int i = 0; i < Values.Length-1; i++) { for(int j = 0; j < Values[i + 1].Length; j++) { Values[i + 1][j] = Sigmoid(GetPassValue(i, j),false); } } return Values[Values.Length - 1]; } double GetPassValue(int layer,int neuron) { double sum = 0; for(int i = 0; i < Values[layer].Length; i++) { sum += Values[layer][i] * Weights[layer][i][neuron]; } return sum; } public double Sigmoid(double d, bool dir) { if (dir) { return d * (1 - d); }else { return 1 / (1 + Math.Exp(d)); } } public void CorrectError(double[] error) { for(int i = Values.Length - 1; i >= 0; i--) { if (i !=Values.Length - 1) { error = new double[Values[i].Length]; for(int j = 0; j < Values[i].Length; j++) { error[j] = 0; for(int k = 0; k < Values[i + 1].Length; k++) { error[j] += Weights[i][j][k] * Deltas[i + 1][k]; } } } for(int j = 0; j < Values[i].Length; j++) { Deltas[i][j] = error[j] * Sigmoid(Values[i][j],true); } } } public void ApplyCorrection(double rate) { for(int i = 0; i < Values.Length-1; i++) { for(int j = 0; j < Values[i].Length; j++) { for(int k = 0; k < Values[i + 1].Length; k++) { Weights[i][j][k] = rate * Deltas[i + 1][k] * Values[i][j]; } } } }}
}
这是我的测试类:
namespace BackPropagationTest{class Program{ static void Main(string[] args) { Network n = new Network(3, 5, 5, 1); double[][] input = new double[][] { new double[] { 1, 0, 1 }, new double[] { 1, 1, 1 }, new double[] { 0, 0, 0 }, new double[] {0, 1, 0 } }; double[][] output = new double[][] { new double[] { 0 },new double[] { 1 }, new double[] { 0 }, new double[] { 0 } }; for (int i = 0; i < 10; i++) { for(int j = 0; j < input.Length; j++) { var x = n.FeedThrough(input[j]); double[] error = new double[output[0].Length]; for(int k= 0; k < x.Length; k++) { error[k] = output[j][k] - x[k]; } n.CorrectError(error); n.ApplyCorrection(0.01); for(int k = 0; k < x.Length; k++) { Console.Write($"Expected: {output[j][k]} Got: {x[k]} "); } Console.WriteLine(); } Console.WriteLine(); } }}
}
这是我的输出:
Expected: 0 Got: 0.270673949003643Expected: 1 Got: 0.500116517554687Expected: 0 Got: 0.499609458404919Expected: 0 Got: 0.50039031963377Expected: 0 Got: 0.500390929619276Expected: 1 Got: 0.500390929999612Expected: 0 Got: 0.499609680732027Expected: 0 Got: 0.500390319841144Expected: 0 Got: 0.50039092961941Expected: 1 Got: 0.500390929999612Expected: 0 Got: 0.499609680732027Expected: 0 Got: 0.500390319841144Expected: 0 Got: 0.50039092961941Expected: 1 Got: 0.500390929999612Expected: 0 Got: 0.499609680732027Expected: 0 Got: 0.500390319841144
这种情况一直持续下去。
编辑1:
我在ApplyCorrection()函数中做了一个更改,将
Weights[i][j][k] = rate * Deltas[i + 1][k] * Values[i][j];
替换为
Weights[i][j][k] += rate * Deltas[i + 1][k] * Values[i][j];
现在权重似乎已经更新了,但对于这个实现的正确性我仍然存疑。也就是说,我仍然需要帮助
编辑2:
我之前没有对输出层的总误差进行求和,而是单独反向传播每个样本的误差。现在我已经这样做了,但输出结果仍然令人困惑:
我还将输出对从(0,1)改为(-1, 1),试图使计算出的误差值更大。这是经过1000000个周期,学习率为0.1后的结果:
Expected: -1 Got: 0.999998429209274Expected: 1 Got: 0.999997843901661Expected: -1 Got: 0.687098308461306Expected: -1 Got: 0.788960893508226Expected: -1 Got: 0.999998429209274Expected: -1 Got: 0.863022549216158Expected: -1 Got: 0.788960893508226Expected: -1 Got: 0.999998474717769
回答:
尝试使用下面的代码,看看误差是否在减少还是保持不变。
public double Sigmoid(double d, bool dir){ if (dir) { return d * (1 - d); }else { if (d < -45.0) return 0.0; else if (d > 45.0) return 1.0; else return 1.0 / (1.0 + Math.Exp(-d)); }}