如何为Accord.NET决策树正确提供输入数据

我正在学习机器学习,特别是决策树。我从Accord .Net框架网站上复制了一段代码,但它似乎对我不起作用,我无法找出原因。它在第40行给我报错:“System.IndexOutOfRangeException: ‘索引超出了数组界限。’”我不确定我哪里做错了,它使用的数据集在这里可以找到: https://en.wikipedia.org/wiki/Iris_flower_data_set 也许我是在以正确的方式提供数据集时遇到了麻烦?顺便说一下,我使用的是Visual Studio Community 2017。

这是代码:

using Accord.MachineLearning.DecisionTrees;using Accord.MachineLearning.DecisionTrees.Learning;using Accord.MachineLearning.DecisionTrees.Rules;using Accord.Math;using Accord.Math.Optimization.Losses;using Accord.Statistics.Filters;using ConsoleApp2.Properties;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace ConsoleApp2{    class Program    {        static void Main(string[] args)        {            // 在这个例子中,我们将处理著名的Fisher's Iris数据集,            // 任务是根据Iris花的特征来分类它是否属于Iris setosa、Iris versicolor或Iris virginica:            //             //  - https://en.wikipedia.org/wiki/Iris_flower_data_set            //             // 首先,我们将数据集加载到一个我们可以处理的文本数组中            string[][] text = Resources.iris_data.Split(new[] { "\r\n" },                StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));            // 前四列包含花的特征            double [][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();            // 最后一列包含预期的花类型            string[] labels = text.GetColumn(4);            // 由于标签是以文本形式表示的,第一步是将这些文本标签转换成整数类别标签,            // 这样我们可以更容易地处理它们。为此,我们将创建一个编码簿来编码类别标签:            //             var codebook = new Codification("Output", labels);            // 使用编码簿,我们可以转换标签:            int[] outputs = codebook.Translate("Output", labels);            // 让我们声明我们的输入变量的名称:            DecisionVariable[] features =            {                new DecisionVariable("sepal length", DecisionVariableKind.Continuous),                 new DecisionVariable("sepal width", DecisionVariableKind.Continuous),                 new DecisionVariable("petal length", DecisionVariableKind.Continuous),                 new DecisionVariable("petal width", DecisionVariableKind.Continuous),             };            // 现在,我们终于可以为3个类别创建我们的树:            var tree = new DecisionTree(inputs: features, classes: 3);            // 我们可以使用C4.5来学习:            var teacher = new C45Learning(tree);            // 最后,我们可以诱导树:            teacher.Learn(inputs, outputs);            // 要获取估计的类别标签,我们可以使用            int[] predicted = tree.Decide(inputs);            // 并且可以计算分类错误(为0.0266)如下:             double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));            // 此外,我们可以决定将我们的树转换为一组规则:            DecisionSet rules = tree.ToRules();            // 并且使用编码簿,我们可以检查树的推理:            string ruleText = rules.ToString(codebook, "Output",                System.Globalization.CultureInfo.InvariantCulture);            // 输出是:            string expected = @"Iris-setosa =: (petal length <= 2.45)Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)";            Console.WriteLine("expected");            Console.ReadLine();        }    }}

回答:

从代码示例本身来看,你所需要的只是一个包含以CSV格式存储的数据的static类:

    static public class Resources    {        public static string iris_data = @"7.9,3.8,6.4,2,I. virginica7.7,3.8,6.7,2.2,I. virginica7.7,2.6,6.9,2.3,I. virginica7.7,2.8,6.7,2,I. virginica7.7,3,6.1,2.3,I. virginica7.6,3,6.6,2.1,I. virginica7.4,2.8,6.1,1.9,I. virginica7.3,2.9,6.3,1.8,I. virginica7.2,3.6,6.1,2.5,I. virginica7.2,3.2,6,1.8,I. virginica7.2,3,5.8,1.6,I. virginica7.1,3,5.9,2.1,I. virginica7,3.2,4.7,1.4,I. versicolor6.9,3.1,4.9,1.5,I. versicolor6.9,3.2,5.7,2.3,I. virginica6.9,3.1,5.4,2.1,I. virginica6.9,3.1,5.1,2.3,I. virginica6.8,2.8,4.8,1.4,I. versicolor6.8,3,5.5,2.1,I. virginica6.8,3.2,5.9,2.3,I. virginica6.7,3.1,4.4,1.4,I. versicolor6.7,3,5,1.7,I. versicolor6.7,3.1,4.7,1.5,I. versicolor6.7,2.5,5.8,1.8,I. virginica6.7,3.3,5.7,2.1,I. virginica6.7,3.1,5.6,2.4,I. virginica6.7,3.3,5.7,2.5,I. virginica6.7,3,5.2,2.3,I. virginica6.6,2.9,4.6,1.3,I. versicolor6.6,3,4.4,1.4,I. versicolor6.5,2.8,4.6,1.5,I. versicolor6.5,3,5.8,2.2,I. virginica6.5,3.2,5.1,2,I. virginica6.5,3,5.5,1.8,I. virginica6.5,3,5.2,2,I. virginica6.4,3.2,4.5,1.5,I. versicolor6.4,2.9,4.3,1.3,I. versicolor6.4,2.7,5.3,1.9,I. virginica6.4,3.2,5.3,2.3,I. virginica6.4,2.8,5.6,2.1,I. virginica6.4,2.8,5.6,2.2,I. virginica6.4,3.1,5.5,1.8,I. virginica6.3,3.3,4.7,1.6,I. versicolor6.3,2.5,4.9,1.5,I. versicolor6.3,2.3,4.4,1.3,I. versicolor6.3,3.3,6,2.5,I. virginica6.3,2.9,5.6,1.8,I. virginica6.3,2.7,4.9,1.8,I. virginica6.3,2.8,5.1,1.5,I. virginica6.3,3.4,5.6,2.4,I. virginica6.3,2.5,5,1.9,I. virginica6.2,2.2,4.5,1.5,I. versicolor6.2,2.9,4.3,1.3,I. versicolor6.2,2.8,4.8,1.8,I. virginica6.2,3.4,5.4,2.3,I. virginica6.1,2.9,4.7,1.4,I. versicolor6.1,2.8,4,1.3,I. versicolor6.1,2.8,4.7,1.2,I. versicolor6.1,3,4.6,1.4,I. versicolor6.1,3,4.9,1.8,I. virginica6.1,2.6,5.6,1.4,I. virginica6,2.2,4,1,I. versicolor6,2.9,4.5,1.5,I. versicolor6,2.7,5.1,1.6,I. versicolor6,3.4,4.5,1.6,I. versicolor6,2.2,5,1.5,I. virginica6,3,4.8,1.8,I. virginica5.9,3,4.2,1.5,I. versicolor5.9,3.2,4.8,1.8,I. versicolor5.9,3,5.1,1.8,I. virginica5.8,4,1.2,0.2,I. setosa5.8,2.7,4.1,1,I. versicolor5.8,2.7,3.9,1.2,I. versicolor5.8,2.6,4,1.2,I. versicolor5.8,2.7,5.1,1.9,I. virginica5.8,2.8,5.1,2.4,I. virginica5.8,2.7,5.1,1.9,I. virginica5.7,4.4,1.5,0.4,I. setosa5.7,3.8,1.7,0.3,I. setosa5.7,2.8,4.5,1.3,I. versicolor5.7,2.6,3.5,1,I. versicolor5.7,3,4.2,1.2,I. versicolor5.7,2.9,4.2,1.3,I. versicolor5.7,2.8,4.1,1.3,I. versicolor5.7,2.5,5,2,I. virginica5.6,2.9,3.6,1.3,I. versicolor5.6,3,4.5,1.5,I. versicolor5.6,2.5,3.9,1.1,I. versicolor5.6,3,4.1,1.3,I. versicolor5.6,2.7,4.2,1.3,I. versicolor5.6,2.8,4.9,2,I. virginica5.5,4.2,1.4,0.2,I. setosa5.5,3.5,1.3,0.2,I. setosa5.5,2.3,4,1.3,I. versicolor5.5,2.4,3.8,1.1,I. versicolor5.5,2.4,3.7,1,I. versicolor5.5,2.5,4,1.3,I. versicolor5.5,2.6,4.4,1.2,I. versicolor5.4,3.9,1.7,0.4,I. setosa5.4,3.7,1.5,0.2,I. setosa5.4,3.9,1.3,0.4,I. setosa5.4,3.4,1.7,0.2,I. setosa5.4,3.4,1.5,0.4,I. setosa5.4,3,4.5,1.5,I. versicolor5.3,3.7,1.5,0.2,I. setosa5.2,3.5,1.5,0.2,I. setosa5.2,3.4,1.4,0.2,I. setosa5.2,4.1,1.5,0.1,I. setosa5.2,2.7,3.9,1.4,I. versicolor5.1,3.5,1.4,0.2,I. setosa5.1,3.5,1.4,0.3,I. setosa5.1,3.8,1.5,0.3,I. setosa5.1,3.7,1.5,0.4,I. setosa5.1,3.3,1.7,0.5,I. setosa5.1,3.4,1.5,0.2,I. setosa5.1,3.8,1.9,0.4,I. setosa5.1,3.8,1.6,0.2,I. setosa5.1,2.5,3,1.1,I. versicolor5,3.6,1.4,0.2,I. setosa5,3.4,1.5,0.2,I. setosa5,3,1.6,0.2,I. setosa5,3.4,1.6,0.4,I. setosa5,3.2,1.2,0.2,I. setosa5,3.5,1.3,0.3,I. setosa5,3.5,1.6,0.6,I. setosa5,3.3,1.4,0.2,I. setosa5,2,3.5,1,I. versicolor5,2.3,3.3,1,I. versicolor4.9,3,1.4,0.2,I. setosa4.9,3.1,1.5,0.1,I. setosa4.9,3.1,1.5,0.2,I. setosa4.9,3.6,1.4,0.1,I. setosa4.9,2.4,3.3,1,I. versicolor4.9,2.5,4.5,1.7,I. virginica4.8,3.4,1.6,0.2,I. setosa4.8,3,1.4,0.1,I. setosa4.8,3.4,1.9,0.2,I. setosa4.8,3.1,1.6,0.2,I. setosa4.8,3,1.4,0.3,I. setosa4.7,3.2,1.3,0.2,I. setosa4.7,3.2,1.6,0.2,I. setosa4.6,3.1,1.5,0.2,I. setosa4.6,3.4,1.4,0.3,I. setosa4.6,3.6,1,0.2,I. setosa4.6,3.2,1.4,0.2,I. setosa4.5,2.3,1.3,0.3,I. setosa4.4,2.9,1.4,0.2,I. setosa4.4,3,1.3,0.2,I. setosa4.4,3.2,1.3,0.2,I. setosa4.3,3,1.1,0.1,I. setosa";    }

此外,你可能还想比较预期结果和实际结果:

Console.WriteLine("expected = \n{0}", expected);Console.WriteLine("ruleText = \n{0}", ruleText);

这应该会给你类似这样的结果:

expected =Iris-setosa =: (2 <= 2.45)Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)ruleText =I. virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)I. virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)I. virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)I. versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)I. versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)I. versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)I. setosa =: (2 <= 2.45)

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注