我正在编写一个简单的感知机模型,当我完成代码并看到没有错误时,我感到非常惊讶。但是看起来我的模型没有收敛(还有一些其他的奇怪现象)。
基本上,每个周期它总是能正确分类25/100个样本。而且每次周期结束时,权重总是回到0。
由于代码分布在多个文件中,我将它们放到了Google Drive上,链接如下: https://drive.google.com/folderview?id=0B_r3mf9HbUrLaDNlc1F6RXhNMnM&usp=sharing
这是一个Visual Studio Community 2013项目。你可以打开并运行它,以便更好地了解情况。
不过,这里是文件的快速预览。
main.cpp:
#include <iostream>#include <vector>#include <algorithm>#include <fstream>#include <string>#include <math.h> #include "LinearAlgebra.h"#include "MachineLearning.h"using namespace std;using namespace LinearAlgebra;using namespace MachineLearning;void printVector(vector< vector<float> > X);vector< vector<float> > getIrisX();vector<float> getIrisy();int main(){ vector< vector<float> > X = getIrisX(); vector<float> y = getIrisy(); vector<float> test1; test1.push_back(5.0); test1.push_back(3.3); test1.push_back(1.4); test1.push_back(0.2); vector<float> test2; test2.push_back(6.0); test2.push_back(2.2); test2.push_back(5.0); test2.push_back(1.5); //printVector(X); //for (int i = 0; i < y.size(); i++){ cout << y[i] << " "; }cout << endl; perceptron clf(0.1, 10); clf.fit(X, y); cout << "现在预测: 5.0,3.3,1.4,0.2(正确类别=1,Iris-setosa) -> " << clf.predict(test1) << endl; cout << "现在预测: 6.0,2.2,5.0,1.5(正确类别=-1,Iris-virginica) -> " << clf.predict(test2) << endl; system("PAUSE"); return 0;}void printVector(vector< vector<float> > X){ for (int i = 0; i < X.size(); i++) { for (int j = 0; j < X[i].size(); j++) { cout << X[i][j] << " "; } cout << endl; }}vector<float> getIrisy(){ vector<float> y; ifstream inFile; inFile.open("y.data"); string sampleClass; for (int i = 0; i < 100; i++) { inFile >> sampleClass; if (sampleClass == "Iris-setosa") { y.push_back(1); } else { y.push_back(-1); } } return y;}vector< vector<float> > getIrisX(){ ifstream af; ifstream bf; ifstream cf; ifstream df; af.open("a.data"); bf.open("b.data"); cf.open("c.data"); df.open("d.data"); vector< vector<float> > X; for (int i = 0; i < 100; i++) { char scrap; int scrapN; af >> scrapN; bf >> scrapN; cf >> scrapN; df >> scrapN; af >> scrap; bf >> scrap; cf >> scrap; df >> scrap; float a, b, c, d; af >> a; bf >> b; cf >> c; df >> d; X.push_back(vector < float > {a, b, c, d}); } af.close(); bf.close(); cf.close(); df.close(); return X;}
MachineLearning.h:
#pragma once#include<vector>using namespace std;namespace MachineLearning{ class perceptron { public: perceptron(float eta,int epochs); float netInput(vector<float> X); int predict(vector<float> X); void fit(vector< vector<float> > X, vector<float> y); private: float m_eta; int m_epochs; vector < float > m_w; vector < float > m_errors; };}
MachineLearning.cpp
#include<vector>#include <algorithm>#include <iostream>#include<fstream>#include <math.h> #include "MachineLearning.h"using namespace std;namespace MachineLearning{ perceptron::perceptron(float eta, int epochs) { m_epochs = epochs; m_eta = eta; } void perceptron::fit(vector< vector<float> > X, vector<float> y) { for (int i = 0; i < X[0].size() + 1; i++) // X[0].size() + 1 -> 我使用+1来添加偏置项 { m_w.push_back(0); } for (int i = 0; i < m_epochs; i++) { int errors = 0; for (int j = 0; j < X.size(); j++) { float update = m_eta * (y[j] - predict(X[j])); m_w[0] = update; for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; } errors += update != 0 ? 1 : 0; } m_errors.push_back(errors); } } float perceptron::netInput(vector<float> X) { // 求和(权重向量 * 输入向量) + 偏置 float probabilities = m_w[0]; for (int i = 0; i < X.size(); i++) { probabilities += X[i] * m_w[i + 1]; } return probabilities; } int perceptron::predict(vector<float> X) { return netInput(X) > 0 ? 1 : -1; //阶跃函数 }}
任何形式的帮助都非常感谢。
提前感谢。Panos P.
回答:
经过数小时的艰苦调试,我终于找到了错误。更新权重时我的代码中有一个bug。
for (int j = 0; j < X.size(); j++) { float update = m_eta * (y[j] - predict(X[j])); m_w[0] = update; for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; } errors += update != 0 ? 1 : 0; }
请注意:
m_w[w] = update * X[j][w - 1]
我将权重设置为等于更新值。看起来我忘记了加号。现在它正常工作了。
现在是这样的:
m_w[w] += update * X[j][w - 1]
有时候最愚蠢的错误会导致最烦人的问题。我希望这能帮助那些犯同样错误的人。