目前,我在使用反向传播算法时遇到了问题。我试图实现并使用它来识别面部的方向(左、右、下、正前方)。基本上有N张图片,我读取像素并将其值(0到255)转换为0.0到1.0之间的值。所有图片的尺寸为32*30。我的输入层有960个神经元,隐藏层有3个神经元,输出层有4个神经元。例如,输出<0.1,0.9,0.1,0.1>表示人朝右看。我按照伪代码进行操作。然而,它无法正常工作 – 它未能正确计算权重,因此无法处理训练和测试样例。以下是代码的一部分:
// 主函数 - 运行算法 private void runBackpropagationAlgorithm() { for (int i = 0; i < 900; ++i) { for (ImageUnit iu : images) { double [] error = calcOutputError(iu.getRatioMatrix(), iu.getClassification()); changeHiddenUnitsOutWeights(error); error = calcHiddenError(error); changeHiddenUnitsInWeights(error,iu.getRatioMatrix()); } } } // 创建神经网络 private void createNeuroneNetwork() { Random generator = new Random(); for (int i = 0; i < inHiddenUnitsWeights.length; ++i) { for (int j = 0; j < hiddenUnits; ++j) { inHiddenUnitsWeights[i][j] = generator.nextDouble(); } } for (int i = 0; i < hiddenUnits; ++i) { for (int j = 0; j < 4; ++j) { outHddenUnitsWeights[i][j] = generator.nextDouble(); } } } // 计算网络中的误差。它会遍历整个网络。private double [] calcOutputError(double[][] input, double [] expectedOutput) { int currentEdge = 0; Arrays.fill(hiddenUnitNodeValue, 0.0); for (int i = 0; i < input.length; ++i) { for (int j = 0; j < input[0].length; ++j) { for (int k = 0; k < hiddenUnits; ++k) { hiddenUnitNodeValue[k] += input[i][j] * inHiddenUnitsWeights[currentEdge][k]; } ++currentEdge; } } double[] out = new double[4]; for (int j = 0; j < 4; ++j) { for (int i = 0; i < hiddenUnits; ++i) { out[j] += outHddenUnitsWeights[i][j] * hiddenUnitNodeValue[i]; } } double [] error = new double [4]; Arrays.fill(error, 4); for (int i = 0; i < 4; ++i) { error[i] = ((expectedOutput[i] - out[i])*(1.0-out[i])*out[i]); //System.out.println((expectedOutput[i] - out[i]) + " " + expectedOutput[i] + " " + out[i]); } return error; }// 更改隐藏神经元的输出边的权重private void changeHiddenUnitsOutWeights(double [] error) { for (int i = 0; i < hiddenUnits; ++i) { for (int j = 0; j < 4; ++j) { outHddenUnitsWeights[i][j] += learningRate*error[j]*hiddenUnitNodeValue[i]; } } }// 回溯到隐藏单元以计算它们的误差。private double [] calcHiddenError(double [] outputError) { double [] error = new double[hiddenUnits]; for (int i = 0; i < hiddenUnits; ++i) { double currentHiddenUnitErrorSum = 0.0; for (int j = 0; j < 4; ++j) { currentHiddenUnitErrorSum += outputError[j]*outHddenUnitsWeights[i][j]; } error[i] = hiddenUnitNodeValue[i] * (1.0 - hiddenUnitNodeValue[i]) * currentHiddenUnitErrorSum; } return error; }// 更改进入隐藏神经元的边的权重。输入是比率矩阵private void changeHiddenUnitsInWeights(double [] error, double[][] input) { int currentEdge = 0; for (int i = 0; i < input.length; ++i) { for (int j = 0; j < input[0].length; ++j) { for (int k = 0; k < hiddenUnits; ++k) { inHiddenUnitsWeights[currentEdge][k] += learningRate*error[k]*input[i][j]; } ++currentEdge; } } }
随着算法的运行,它计算的权重越来越大,最终接近无穷大(NaN值)。我检查了代码。可惜,我没能解决我的问题。我将非常感激任何愿意帮助我的人。
回答:
你的代码缺少传输函数。听起来你想要使用带有softmax输出的逻辑函数。你需要在calcOutputError中包含以下内容
// 隐藏层的逻辑传输函数。 for (int k = 0; k < hiddenUnits; ++k) { hiddenUnitNodeValue[k] = logistic(hiddenUnitNodeValue[k]);}
和
// 输出层的Softmax传输函数。sum = 0;for (int j = 0; j < 4; ++j) { out[j] = logistic(out[j]); sum += out[j];}for (int j = 0; j < 4; ++j) { out[j] = out[j] / sum;}
其中逻辑函数是
public double logistic(double x){ return (1/(1+(Math.exp(-x)));}
请注意,softmax传输函数会使输出总和为1,因此它们可以被解释为概率。
另外,你对输出层的误差梯度的计算是错误的。它应该简单地是
for (int i = 0; i < 4; ++i) { error[i] = (expectedOutput[i] - out[i]);}