我正在学习机器学习,特别是决策树。我从Accord .Net框架网站上复制了一段代码,但它似乎对我不起作用,我无法找出原因。它在第40行给我报错:“System.IndexOutOfRangeException: ‘索引超出了数组界限。’”我不确定我哪里做错了,它使用的数据集在这里可以找到: https://en.wikipedia.org/wiki/Iris_flower_data_set 也许我是在以正确的方式提供数据集时遇到了麻烦?顺便说一下,我使用的是Visual Studio Community 2017。
这是代码:
using Accord.MachineLearning.DecisionTrees;using Accord.MachineLearning.DecisionTrees.Learning;using Accord.MachineLearning.DecisionTrees.Rules;using Accord.Math;using Accord.Math.Optimization.Losses;using Accord.Statistics.Filters;using ConsoleApp2.Properties;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace ConsoleApp2{ class Program { static void Main(string[] args) { // 在这个例子中,我们将处理著名的Fisher's Iris数据集, // 任务是根据Iris花的特征来分类它是否属于Iris setosa、Iris versicolor或Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // 首先,我们将数据集加载到一个我们可以处理的文本数组中 string[][] text = Resources.iris_data.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); // 前四列包含花的特征 double [][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); // 最后一列包含预期的花类型 string[] labels = text.GetColumn(4); // 由于标签是以文本形式表示的,第一步是将这些文本标签转换成整数类别标签, // 这样我们可以更容易地处理它们。为此,我们将创建一个编码簿来编码类别标签: // var codebook = new Codification("Output", labels); // 使用编码簿,我们可以转换标签: int[] outputs = codebook.Translate("Output", labels); // 让我们声明我们的输入变量的名称: DecisionVariable[] features = { new DecisionVariable("sepal length", DecisionVariableKind.Continuous), new DecisionVariable("sepal width", DecisionVariableKind.Continuous), new DecisionVariable("petal length", DecisionVariableKind.Continuous), new DecisionVariable("petal width", DecisionVariableKind.Continuous), }; // 现在,我们终于可以为3个类别创建我们的树: var tree = new DecisionTree(inputs: features, classes: 3); // 我们可以使用C4.5来学习: var teacher = new C45Learning(tree); // 最后,我们可以诱导树: teacher.Learn(inputs, outputs); // 要获取估计的类别标签,我们可以使用 int[] predicted = tree.Decide(inputs); // 并且可以计算分类错误(为0.0266)如下: double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); // 此外,我们可以决定将我们的树转换为一组规则: DecisionSet rules = tree.ToRules(); // 并且使用编码簿,我们可以检查树的推理: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // 输出是: string expected = @"Iris-setosa =: (petal length <= 2.45)Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)"; Console.WriteLine("expected"); Console.ReadLine(); } }}
回答:
从代码示例本身来看,你所需要的只是一个包含以CSV格式存储的数据的static
类:
static public class Resources { public static string iris_data = @"7.9,3.8,6.4,2,I. virginica7.7,3.8,6.7,2.2,I. virginica7.7,2.6,6.9,2.3,I. virginica7.7,2.8,6.7,2,I. virginica7.7,3,6.1,2.3,I. virginica7.6,3,6.6,2.1,I. virginica7.4,2.8,6.1,1.9,I. virginica7.3,2.9,6.3,1.8,I. virginica7.2,3.6,6.1,2.5,I. virginica7.2,3.2,6,1.8,I. virginica7.2,3,5.8,1.6,I. virginica7.1,3,5.9,2.1,I. virginica7,3.2,4.7,1.4,I. versicolor6.9,3.1,4.9,1.5,I. versicolor6.9,3.2,5.7,2.3,I. virginica6.9,3.1,5.4,2.1,I. virginica6.9,3.1,5.1,2.3,I. virginica6.8,2.8,4.8,1.4,I. versicolor6.8,3,5.5,2.1,I. virginica6.8,3.2,5.9,2.3,I. virginica6.7,3.1,4.4,1.4,I. versicolor6.7,3,5,1.7,I. versicolor6.7,3.1,4.7,1.5,I. versicolor6.7,2.5,5.8,1.8,I. virginica6.7,3.3,5.7,2.1,I. virginica6.7,3.1,5.6,2.4,I. virginica6.7,3.3,5.7,2.5,I. virginica6.7,3,5.2,2.3,I. virginica6.6,2.9,4.6,1.3,I. versicolor6.6,3,4.4,1.4,I. versicolor6.5,2.8,4.6,1.5,I. versicolor6.5,3,5.8,2.2,I. virginica6.5,3.2,5.1,2,I. virginica6.5,3,5.5,1.8,I. virginica6.5,3,5.2,2,I. virginica6.4,3.2,4.5,1.5,I. versicolor6.4,2.9,4.3,1.3,I. versicolor6.4,2.7,5.3,1.9,I. virginica6.4,3.2,5.3,2.3,I. virginica6.4,2.8,5.6,2.1,I. virginica6.4,2.8,5.6,2.2,I. virginica6.4,3.1,5.5,1.8,I. virginica6.3,3.3,4.7,1.6,I. versicolor6.3,2.5,4.9,1.5,I. versicolor6.3,2.3,4.4,1.3,I. versicolor6.3,3.3,6,2.5,I. virginica6.3,2.9,5.6,1.8,I. virginica6.3,2.7,4.9,1.8,I. virginica6.3,2.8,5.1,1.5,I. virginica6.3,3.4,5.6,2.4,I. virginica6.3,2.5,5,1.9,I. virginica6.2,2.2,4.5,1.5,I. versicolor6.2,2.9,4.3,1.3,I. versicolor6.2,2.8,4.8,1.8,I. virginica6.2,3.4,5.4,2.3,I. virginica6.1,2.9,4.7,1.4,I. versicolor6.1,2.8,4,1.3,I. versicolor6.1,2.8,4.7,1.2,I. versicolor6.1,3,4.6,1.4,I. versicolor6.1,3,4.9,1.8,I. virginica6.1,2.6,5.6,1.4,I. virginica6,2.2,4,1,I. versicolor6,2.9,4.5,1.5,I. versicolor6,2.7,5.1,1.6,I. versicolor6,3.4,4.5,1.6,I. versicolor6,2.2,5,1.5,I. virginica6,3,4.8,1.8,I. virginica5.9,3,4.2,1.5,I. versicolor5.9,3.2,4.8,1.8,I. versicolor5.9,3,5.1,1.8,I. virginica5.8,4,1.2,0.2,I. setosa5.8,2.7,4.1,1,I. versicolor5.8,2.7,3.9,1.2,I. versicolor5.8,2.6,4,1.2,I. versicolor5.8,2.7,5.1,1.9,I. virginica5.8,2.8,5.1,2.4,I. virginica5.8,2.7,5.1,1.9,I. virginica5.7,4.4,1.5,0.4,I. setosa5.7,3.8,1.7,0.3,I. setosa5.7,2.8,4.5,1.3,I. versicolor5.7,2.6,3.5,1,I. versicolor5.7,3,4.2,1.2,I. versicolor5.7,2.9,4.2,1.3,I. versicolor5.7,2.8,4.1,1.3,I. versicolor5.7,2.5,5,2,I. virginica5.6,2.9,3.6,1.3,I. versicolor5.6,3,4.5,1.5,I. versicolor5.6,2.5,3.9,1.1,I. versicolor5.6,3,4.1,1.3,I. versicolor5.6,2.7,4.2,1.3,I. versicolor5.6,2.8,4.9,2,I. virginica5.5,4.2,1.4,0.2,I. setosa5.5,3.5,1.3,0.2,I. setosa5.5,2.3,4,1.3,I. versicolor5.5,2.4,3.8,1.1,I. versicolor5.5,2.4,3.7,1,I. versicolor5.5,2.5,4,1.3,I. versicolor5.5,2.6,4.4,1.2,I. versicolor5.4,3.9,1.7,0.4,I. setosa5.4,3.7,1.5,0.2,I. setosa5.4,3.9,1.3,0.4,I. setosa5.4,3.4,1.7,0.2,I. setosa5.4,3.4,1.5,0.4,I. setosa5.4,3,4.5,1.5,I. versicolor5.3,3.7,1.5,0.2,I. setosa5.2,3.5,1.5,0.2,I. setosa5.2,3.4,1.4,0.2,I. setosa5.2,4.1,1.5,0.1,I. setosa5.2,2.7,3.9,1.4,I. versicolor5.1,3.5,1.4,0.2,I. setosa5.1,3.5,1.4,0.3,I. setosa5.1,3.8,1.5,0.3,I. setosa5.1,3.7,1.5,0.4,I. setosa5.1,3.3,1.7,0.5,I. setosa5.1,3.4,1.5,0.2,I. setosa5.1,3.8,1.9,0.4,I. setosa5.1,3.8,1.6,0.2,I. setosa5.1,2.5,3,1.1,I. versicolor5,3.6,1.4,0.2,I. setosa5,3.4,1.5,0.2,I. setosa5,3,1.6,0.2,I. setosa5,3.4,1.6,0.4,I. setosa5,3.2,1.2,0.2,I. setosa5,3.5,1.3,0.3,I. setosa5,3.5,1.6,0.6,I. setosa5,3.3,1.4,0.2,I. setosa5,2,3.5,1,I. versicolor5,2.3,3.3,1,I. versicolor4.9,3,1.4,0.2,I. setosa4.9,3.1,1.5,0.1,I. setosa4.9,3.1,1.5,0.2,I. setosa4.9,3.6,1.4,0.1,I. setosa4.9,2.4,3.3,1,I. versicolor4.9,2.5,4.5,1.7,I. virginica4.8,3.4,1.6,0.2,I. setosa4.8,3,1.4,0.1,I. setosa4.8,3.4,1.9,0.2,I. setosa4.8,3.1,1.6,0.2,I. setosa4.8,3,1.4,0.3,I. setosa4.7,3.2,1.3,0.2,I. setosa4.7,3.2,1.6,0.2,I. setosa4.6,3.1,1.5,0.2,I. setosa4.6,3.4,1.4,0.3,I. setosa4.6,3.6,1,0.2,I. setosa4.6,3.2,1.4,0.2,I. setosa4.5,2.3,1.3,0.3,I. setosa4.4,2.9,1.4,0.2,I. setosa4.4,3,1.3,0.2,I. setosa4.4,3.2,1.3,0.2,I. setosa4.3,3,1.1,0.1,I. setosa"; }
此外,你可能还想比较预期结果和实际结果:
Console.WriteLine("expected = \n{0}", expected);Console.WriteLine("ruleText = \n{0}", ruleText);
这应该会给你类似这样的结果:
expected =Iris-setosa =: (2 <= 2.45)Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)ruleText =I. virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)I. virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)I. virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)I. versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)I. versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)I. versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)I. setosa =: (2 <= 2.45)