使用Swift实现带反向传播的简单神经网络

我正在尝试实现一个带有反向传播的非常简单的神经网络。我试图用AND逻辑运算符来训练这个网络。但是预测结果对我来说不太好。:(

    public class ActivationFunction {        class func sigmoid(x: Float) -> Float {            return 1.0 / (1.0 + exp(-x))        }        class func dSigmoid(x: Float) -> Float {            return x * (1 - x)        }    }    public class NeuralNetConstants {        public static let learningRate: Float = 0.3        public static let momentum: Float = 0.6        public static let iterations: Int = 100000    }public class Layer {    private var output: [Float]    private var input: [Float]    private var weights: [Float]    private var dWeights: [Float]    init(inputSize: Int, outputSize: Int) {        self.output = [Float](repeating: 0, count: outputSize)        self.input = [Float](repeating: 0, count: inputSize + 1)        self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)        self.dWeights = [Float](repeating: 0, count: weights.count)    }    public func run(inputArray: [Float]) -> [Float] {        input =  inputArray        input[input.count-1] = 1        var offSet = 0        for i in 0..<output.count {            for j in 0..<input.count {                output[i] += weights[offSet+j] * input[j]            }            output[i] = ActivationFunction.sigmoid(x: output[i])            offSet += input.count        }        return output    }    public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] {        var offset = 0        var nextError = [Float](repeating: 0, count: input.count)        for i in 0..<output.count {            let delta = error[i] * ActivationFunction.dSigmoid(x: output[i])            for j in 0..<input.count {                let weightIndex = offset + j                nextError[j] = nextError[j] + weights[weightIndex] * delta                let dw = input[j] * delta * learningRate                weights[weightIndex] += dWeights[weightIndex] * momentum + dw                dWeights[weightIndex] = dw            }            offset += input.count        }        return nextError    }}public class BackpropNeuralNetwork {    private var layers: [Layer] = []    public init(inputSize: Int, hiddenSize: Int, outputSize: Int) {        self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize))        self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize))    }    public func getLayer(index: Int) -> Layer {        return layers[index]    }    public func run(input: [Float]) -> [Float] {        var activations = input        for i in 0..<layers.count {            activations = layers[i].run(inputArray: activations)        }        return activations    }    public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) {        let calculatedOutput = run(input: input)        var error = [Float](repeating: 0, count: calculatedOutput.count)        for i in 0..<error.count {            error[i] = targetOutput[i] - calculatedOutput[i]        }        for i in (0...layers.count-1).reversed() {            error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum)        }    }}extension ClosedRange where Bound: FloatingPoint {    public func random() -> Bound {        let range = self.upperBound - self.lowerBound        let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound        return randomValue    }}

这是我的训练数据,我只希望我的网络能学会简单的AND逻辑运算符。

我的输入数据:

let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ]let traningResults: [[Float]] = [ [0], [0], [0], [1] ]let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1)for iterations in 0..<NeuralNetConstants.iterations {    for i in 0..<traningResults.count {        backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum)    }    for i in 0..<traningResults.count {        var t = traningData[i]        print("\(t[0]), \(t[1])  -- \(backProb.run(input: t)[0])")    }}

这是我整个神经网络的代码。代码并不是很符合Swift的风格,但我认为首先更重要的是理解神经网络的理论,然后代码会变得更符合Swift的风格。

问题是我的结果完全不对。这是我的结果:

0.0, 0.0  -- 0.2461350.0, 1.0  -- 0.2513071.0, 0.0  -- 0.243251.0, 1.0  -- 0.240923

这是我想要的结果:

0,0, 0,0 -- 0,0000,0, 1,0 -- 0,0051,0, 0,0 -- 0,0051,0, 1,0 -- 0,992

相比之下,Java实现工作得很好…

public class ActivationFunction {    public static float sigmoid(float x) {        return (float) (1 / (1 + Math.exp(-x)));    }    public static float dSigmoid(float x) {        return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice    }}public class NeuralNetConstants {    private NeuralNetConstants() {    }    public static final float LEARNING_RATE = 0.3f;    public static final float MOMENTUM = 0.6f;    public static final int ITERATIONS = 100000;}public class Layer {    private float[] output;    private float[] input;    private float[] weights;    private float[] dWeights;    private Random random;    public Layer(int inputSize, int outputSize) {        output = new float[outputSize];        input = new float[inputSize + 1];        weights = new float[(1 + inputSize) * outputSize];        dWeights = new float[weights.length];        this.random = new Random();        initWeights();    }    public void initWeights() {        for (int i = 0; i < weights.length; i++) {            weights[i] = (random.nextFloat() - 0.5f) * 4f;        }    }    public float[] run(float[] inputArray) {        System.arraycopy(inputArray, 0, input, 0, inputArray.length);        input[input.length - 1] = 1; // bias        int offset = 0;        for (int i = 0; i < output.length; i++) {            for (int j = 0; j < input.length; j++) {                output[i] += weights[offset + j] * input[j];            }            output[i] = ActivationFunction.sigmoid(output[i]);            offset += input.length;        }        return Arrays.copyOf(output, output.length);    }    public float[] train(float[] error, float learningRate, float momentum) {        int offset = 0;        float[] nextError = new float[input.length];        for (int i = 0; i < output.length; i++) {            float delta = error[i] * ActivationFunction.dSigmoid(output[i]);             for (int j = 0; j < input.length; j++) {                int previousWeightIndex = offset + j;                nextError[j] = nextError[j] + weights[previousWeightIndex] * delta;                float dw = input[j] * delta * learningRate;                weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw;                dWeights[previousWeightIndex] = dw;            }            offset += input.length;        }        return nextError;    }}public class BackpropNeuralNetwork {    private Layer[] layers;    public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) {        layers = new Layer[2];        layers[0] = new Layer(inputSize, hiddenSize);        layers[1] = new Layer(hiddenSize, outputSize);    }    public Layer getLayer(int index) {        return layers[index];    }    public float[] run(float[] input) {        float[] inputActivation = input;        for (int i = 0; i < layers.length; i++) {            inputActivation = layers[i].run(inputActivation);        }        return inputActivation;    }    public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {        float[] calculatedOutput = run(input);        float[] error = new float[calculatedOutput.length];        for (int i = 0; i < error.length; i++) {            error[i] = targetOutput[i] - calculatedOutput[i];         }        for (int i = layers.length - 1; i >= 0; i--) {            error = layers[i].train(error, learningRate, momentum);        }    }}public class NeuralNetwork {    /**     * @param args the command line arguments     */    public static void main(String[] args) {                float[][] trainingData = new float[][] {                 new float[] { 0, 0 },                 new float[] { 0, 1 },                 new float[] { 1, 0 },                new float[] { 1, 1 }         };        float[][] trainingResults = new float[][] {                new float[] { 0 },                 new float[] { 0 },                 new float[] { 0 },                new float[] { 1 }         };        BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1);        for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) {            for (int i = 0; i < trainingResults.length; i++) {                backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i],                        NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM);            }            System.out.println();            for (int i = 0; i < trainingResults.length; i++) {                float[] t = trainingData[i];                System.out.printf("%d epoch\n", iterations + 1);                System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]);            }        }    }}

回答:

您对权重进行了不同的初始化。您创建了一个随机值并多次使用它。您需要为数组中的每个权重创建一个随机值:替换

self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)

self.weights = (0..<(1 + inputSize) * outputSize).map { _ in  return (-2.0...2.0).random()}

此外,请考虑在Layer.run方法中仅覆盖输入的第一个元素。因此,不是

input =  inputArray

您应该这样做:

for (i, e) in inputArray.enumerated() {  self.input[i] = e}

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注