MLP with vectors only and backprop issue

我对人工智能很感兴趣,并开始学习相关知识。我尝试基于向量实现一个MLP类,但它无法正常工作。

前馈函数似乎没问题,但显然我对反向传播算法的理解不够。训练函数是一个用于测试XOR情况的虚拟函数,网络总是返回相同的结果(在我这里是约0.625915),当输入为1和0时(期望输出为1),误差为0.264518,而当输入为1和1时(期望输出为0),误差变为0.442609。

我想知道我在反向传播和梯度下降方面做错了什么。下面是这个类的完整代码和主函数。感谢您的帮助和指导!

#include <iostream>#include <vector>#include <cassert>#include <functional>#include <stdlib.h>using namespace std;typedef function<double(double, bool)> func;typedef vector < vector < vector<double> > > Matrix3d;class Net {public:    Net(const vector<unsigned> &topology, vector<func> &fns) {        learning_rate = 0.1;        alpha = 0.5;        global_error = 1.0;        activationFns = fns;        nbLayers = topology.size();        lastLayerId = nbLayers - 1;        gradients.resize(nbLayers);        neuron_errors.resize(nbLayers);        layers.resize(nbLayers);        weights.resize(nbLayers);        wdeltas.resize(nbLayers);        for (unsigned layerNum = 0; layerNum < nbLayers; layerNum++) {            bool isLastLayer = layerNum == lastLayerId;            unsigned nbNeuronsInLayer = isLastLayer ? topology[layerNum] : topology[layerNum] + 1;            unsigned nbWeights = isLastLayer ? 0 : topology[layerNum + 1] + 1;            gradients[layerNum].resize(nbNeuronsInLayer, 0.0);            layers[layerNum].resize(nbNeuronsInLayer);            weights[layerNum].resize(nbNeuronsInLayer);            wdeltas[layerNum].resize(nbNeuronsInLayer);            neuron_errors[layerNum].resize(nbNeuronsInLayer, 0.0);            if (! isLastLayer) {                layers[layerNum][nbNeuronsInLayer-1] = 1.0; // 初始化偏置             }            for (unsigned n = 0; n < weights[layerNum].size(); n++) {                weights[layerNum][n].resize(nbWeights); // 设置下一层神经元的数量:这个神经元的权重数量                 wdeltas[layerNum][n].resize(nbWeights, 0.0);                InitialiseWeights(weights[layerNum][n]); // 随机化这一层神经元的权重             }        }    };    ~Net() {        gradients.clear();        layers.clear();        weights.clear();        wdeltas.clear();        neuron_errors.clear();    };    // 通过网络传播     // 在前馈过程中,输出值等于激活函数(神经元输入的总和乘以它们的权重)    // 对于前一层中的每个神经元:    // 取其输出:prevLayer[n],然后乘以当前层神经元i的权重     void FeedForward(const vector<double> &inputs) {        assert(inputs.size() == layers[0].size() - 1);        // 将输入分配给输入层神经元的输出         for (unsigned i = 0; i < inputs.size(); i++) {            layers[0][i] = inputs[i];        }        for (unsigned layerNum = 1; layerNum < nbLayers; layerNum++) {            vector<double> &prevLayer = layers[layerNum - 1];                            const bool isLastLayer = layerNum == lastLayerId;            const unsigned forcap = isLastLayer ? layers[layerNum].size() : layers[layerNum].size() - 1;            for (unsigned i = 0; i < forcap; i++) {                const double bias = prevLayer[prevLayer.size()-1] * weights[layerNum-1][weights[layerNum-1].size()-1][i];                double output = 0.0;                 for (unsigned n = 0; n < prevLayer.size() - 1; n++) {                    output += prevLayer[n] * weights[layerNum - 1][n][i];                }                output += bias;                layers[layerNum][i] = activationFns[layerNum - 1](output, false);            }        }        //Print();    };    void BackPropagate(const vector<double> &targets) {        vector<double> &guessed = layers[lastLayerId];        func &outputActivationFn = activationFns[lastLayerId];        assert(targets.size() == guessed.size());        global_error = 0.0;        // 计算输出层的误差 //        for (unsigned t = 0; t < targets.size(); t++) {            double diff_ =  targets[t] - guessed[t];            global_error += (diff_ * diff_);             neuron_errors[lastLayerId][t] = targets[t] - guessed[t]; // 输出神经元的误差            gradients[lastLayerId][t] = diff_ * outputActivationFn(guessed[t], true);        }        if (guessed.size() > 1)            global_error /= guessed.size()-1;        else            global_error *= 0.5;        global_error = sqrt(global_error);        // 计算其他层神经元的误差         for (unsigned l = nbLayers - 2; l < nbLayers; --l) {            // 获取连接隐藏层和输出层的权重             for (unsigned n = 0; n < layers[l].size(); n++) { // 对于这一层的每个神经元                 neuron_errors[l][n] = 0.0;                for (unsigned m = 0; m < layers[l+1].size(); m++) { // 目标是上一层的第m个神经元                    double &weight = weights[l][n][m];                    // 这里可以计算神经元n的误差                    neuron_errors[l][n] += weight * gradients[l+1][m];                }                gradients[l][n] = neuron_errors[l][n] * activationFns[l](layers[l][n], true); // ?            }        }        // 更新权重 (?)        for (unsigned l = nbLayers - 2; l < nbLayers; --l) {            for (unsigned n = 0; n < layers[l].size(); n++) {                for (unsigned m = 0; m < layers[l + 1].size(); m++) {                    weights[l][n][m] -= (learning_rate * gradients[l][n] * layers[l][n]) + (wdeltas[l][n][m] * alpha);                    wdeltas[l][n][m] = (learning_rate * gradients[l][n] * layers[l][n]) + (wdeltas[l][n][m] * alpha);                }            }        }    };    void GetResults(vector<double> &results) {        results.clear();        for (unsigned i = 0; i < layers[lastLayerId].size(); i++) {            results[i] = layers[lastLayerId][i];        }    };    void Train() {        vector < vector<double> > ins = {            { 1.0, 0.0 },            { 0.0, 1.0 },            { 0.0, 0.0 },            { 1.0, 1.0 }        };        vector < vector<double> > outs = {            { 1.0 },            { 1.0 },            { 0.0 },            { 0.0 }        };        for (unsigned i = 0; i < 1000; i++) {            unsigned r = rand() % ins.size();            vector<double> k = ins[r];            vector<double> o = outs[r];            FeedForward(k);            BackPropagate(o);            cout << "[" << i << "] " << k[0] << " & " << k[1] << " -> " << o[0] << "\tresult : " << layers[lastLayerId][0] << "\terror = " << global_error << endl;        }        cout << endl << "Test: [ 1 , 0 ]" << endl;        FeedForward({ 1.0, 0.0 });        BackPropagate({ 1.0 });        cout << "Result : " << layers[lastLayerId][0] << "\t(error = " << global_error << endl;        cout << "Test: [ 1 , 1 ]" << endl;        FeedForward({ 0.85, 0.99 });        BackPropagate({ 0.0 });        cout << "Result : " << layers[lastLayerId][0] << "\t(error = " << global_error << endl;    };    double Getglobal_error(void) const {        return global_error;    };    void Print(void) {        for (unsigned l = 0; l < nbLayers; l++) {            cout << "Layer " << l << " : " << endl;            for (unsigned n = 0; n < layers[l].size(); n++) {                cout << "\t" << "Neuron " << l << "-" << n << " : ";                cout << "(" << layers[l][n] << ")" << endl;                for (unsigned w = 0; w < weights[l][n].size(); w++) {                    cout << "\t\t" << l << "-" << n << " -> " << (l+1) << "-" << w << " | weight=" << weights[l][n][w] << endl;                }            }        }    }private:    void InitialiseWeights(vector<double> &weights_) {        for (unsigned w = 0; w < weights_.size(); w++) {            weights_[w] = ((double) rand() / (RAND_MAX));        }    }    double global_error;    double learning_rate;    double alpha;    unsigned nbLayers;    unsigned lastLayerId;    vector<func> activationFns;    vector< vector<double> > gradients; // [layerNum][neuronNum] 神经元误差梯度     vector< vector<double> > layers; // [layerNum][neuronNum]    vector< vector<double> > neuron_errors; // [layerNum][neuronNum] // 神经元误差     Matrix3d weights; // [layer][neuron][outputWeight]    Matrix3d wdeltas; // [layer][neuron][outputWeight]};double transfer_tanh(double x, bool isDerivative) {    if (isDerivative) {        return 1.0 - (tanh(x) * tanh(x));    }    return tanh(x);}double transfer_sigmoid(double x, bool isDerivative) {    if (isDerivative) {        return x * (1.0 - x);    }    return 1.0 / (1.0 + exp(-x));}int main () {    vector<unsigned> topo = { 2, 2, 1 };    vector<func> funcs = { transfer_sigmoid, transfer_sigmoid, transfer_sigmoid };    Net mynet(topo, funcs);        /*    mynet.FeedForward({ 1.0, 0.0 });    mynet.BackPropagate({ 1.0 });    mynet.Print();    mynet.FeedForward({ 1.0, 0.0 });    mynet.BackPropagate({ 1.0 });    mynet.Print();    */    mynet.Train();}

回答:

我对反向传播相关的数学理解不足。感谢这个资源:https://pabloinsente.github.io/the-multilayer-perceptron,我弄明白了这个BackPropagate方法:

void BackPropagate(const vector<double> &targets) {        assert(targets.size() == layers[lastLayerId].size());        global_error = 0.0;        for (unsigned l = lastLayerId; l < nbLayers; --l) {            for (unsigned n = 0; n < layers[l].size(); n++) {                neuron_errors[l][n] = 0.0;                if (l == lastLayerId) { // 输出层                    global_error += (targets[n] - layers[lastLayerId][n]) * (targets[n] - layers[lastLayerId][n]);                    neuron_errors[lastLayerId][n] = (targets[n] - layers[lastLayerId][n]) * activationFns[lastLayerId](layers[lastLayerId][n], true);                    continue;                }                for (unsigned m = 0; m < layers[l + 1].size(); m++) {                    double neuron_output = (l == 0) ? inputs[n] : layers[l][n];                    double delta = learning_rate * (neuron_errors[l + 1][m] * neuron_output);                    neuron_errors[l][n] += (neuron_errors[l + 1][m] * weights[l][n][m])                         * activationFns[l](layers[l][n], true);                    weights[l][n][m] += delta + (wdeltas[l][n][m] * alpha);                    wdeltas[l][n][m] = delta;                }            }        }}

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注