我一直在尝试创建一个异或神经网络,但无论输入是什么,输出总是收敛到某个固定值(比如1,或者0,或者0.5)。这是我最近的尝试:
import java.io.*;import java.util.*;public class Main { public static void main(String[] args) { double[][] trainingInputs = { {1, 1}, {1, 0}, {0, 1}, {1, 1} }; double[] targetOutputs = {0, 1, 1, 0}; NeuralNetwork network = new NeuralNetwork(); System.out.println("Training"); for(int i=0; i<40; i++) { network.train(trainingInputs, targetOutputs); } for(double[] inputs : trainingInputs) { double output = network.feedForward(inputs); System.out.println(inputs[0] + " - " + inputs[1] + " : " + output); } }}class Neuron { private ArrayList<Synapse> inputs; // List di sinapsi collegate al neurone private double output; // output del neurone private double derivative; // derivata dell'output private double weightedSum; // somma ponderata del peso delle sinapsi e degli output collegati private double error; // errore public Neuron() { inputs = new ArrayList<Synapse>(); error = 0; } // Aggiunge una sinpapsi public void addInput(Synapse input) { inputs.add(input); } public List<Synapse> getInputs() { return this.inputs; } public double[] getWeights() { double[] weights = new double[inputs.size()]; int i = 0; for(Synapse synapse : inputs) { weights[i] = synapse.getWeight(); i++; } return weights; } private void calculateWeightedSum() { weightedSum = 0; for(Synapse synapse : inputs) { weightedSum += synapse.getWeight() * synapse.getSourceNeuron().getOutput(); } } public void activate() { calculateWeightedSum(); output = sigmoid(weightedSum); derivative = sigmoidDerivative(output); } public double getOutput() { return this.output; } public void setOutput(double output) { this.output = output; } public double getDerivative() { return this.derivative; } public double getError() { return error; } public void setError(double error) { this.error = error; } public double sigmoid(double weightedSum) { return 1 / (1 + Math.exp(-weightedSum)); } public double sigmoidDerivative(double output) { return output / (1 - output); }}class Synapse implements Serializable { private Neuron sourceNeuron; // Neurone da cui origina la sinapsi private double weight; // Peso della sinapsi public Synapse(Neuron sourceNeuron) { this.sourceNeuron = sourceNeuron; this.weight = Math.random() - 0.5; } public Neuron getSourceNeuron() { return sourceNeuron; } public double getWeight() { return weight; } public void adjustWeight(double deltaWeight) { this.weight += deltaWeight; }}class NeuralNetwork implements Serializable { Neuron[] input; Neuron[] hidden; Neuron output; double learningRate = 0.1; public NeuralNetwork() { input = new Neuron[2]; hidden = new Neuron[2]; output = new Neuron(); for(int i=0; i<2; i++) { input[i] = new Neuron(); } for(int i=0; i<2; i++) { hidden[i] = new Neuron(); } for(int i=0; i<2; i++) { Synapse s = new Synapse(hidden[i]); output.addInput(s); } for(int i=0; i<2; i++) { for(int j=0; j<2; j++) { Synapse s = new Synapse(input[j]); hidden[i].addInput(s); } } } public void setInput(double[] inputVal) { for(int i=0; i<2; i++) { input[i].setOutput(inputVal[i]); } } public double feedForward(double[] inputVal) { setInput(inputVal); for(int i=0; i<2; i++) { hidden[i].activate(); } output.activate(); return output.getOutput(); } public void train(double[][] trainingInputs, double[] targetOutputs) { for(int i=0; i<4; i++) { double[] inputs = trainingInputs[i]; double target = targetOutputs[i]; double currentOutput = feedForward(inputs); double delta = 0; double neuronError = 0; for(int j=0; j<2; j++) { Synapse s = output.getInputs().get(j); neuronError = output.getDerivative() * (target - currentOutput); delta = learningRate * s.getSourceNeuron().getOutput() * neuronError; output.setError(neuronError); s.adjustWeight(delta); } for(int j=0; j<2; j++) { for(int k=0; k<2; k++) { Synapse s = hidden[j].getInputs().get(k); Synapse s1 = output.getInputs().get(j); delta = learningRate * s.getSourceNeuron().getOutput() * hidden[j].getDerivative() * s1.getWeight() * output.getError(); s.adjustWeight(delta); } } } }}
我从GitHub上找到了别人的反向传播算法实现,并尝试使用它,但得到的输出要么是0.50左右,要么是NaN。我不知道是我用了错误的算法,还是我实现的方式有问题,或者还有其他原因。
我使用的算法是这样的:首先,我找到神经元本身的误差:
如果是输出神经元,那么neuronError =(输出神经元的导数)*(期望输出 – 实际输出)
如果是隐藏层神经元,那么neuronError =(隐藏层神经元的导数)*(输出神经元的neuronError)*(从隐藏层神经元到输出神经元的突触的权重)
然后deltaWeight = learningRate *(突触起始神经元的neuronError)*(突触起始神经元的输出)
最后,我将deltaWeight添加到之前的权重上。
对不起,文本有点长,如果你不愿意通读代码,至少能告诉我我的算法是否正确吗?谢谢
回答:
你的Sigmoid导数计算有误,正确的应该是如下所示:
public double sigmoidDerivative(double output) { return output * (1 - output); }}
正如我在评论中所说,你的训练输入中有两次{1, 1},所以请将其中一个改为{0, 0}。
最后,将迭代次数从40增加到100,000。