ML.NET算法在评估乳腺癌实例时始终为假

我在使用ML.NET的二元分类算法处理威斯康星乳腺癌数据。在训练模型后,我发现每个实例都被评估为假。我的测试文件中有100个实例,其中75个为负例,25个为正例。因此,从指标来看,准确率为0.75,负例精确度为0.75。这意味着所有实例都被评估为0(假)。

private static string trainingDataPath = Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "uploads"), "data.csv");        private static string testDataPath = Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "uploads"), "test.csv");        public bool checkDiagnostic (BreastCancerData input)        {             // 创建一个新的机器学习上下文            var mlContext = new MLContext();            // 加载训练和测试数据            var trainingDataView = mlContext.Data.LoadFromTextFile<BreastCancerData>(trainingDataPath, hasHeader: false, separatorChar: ',');            var testDataView = mlContext.Data.LoadFromTextFile<BreastCancerData>(testDataPath, hasHeader: false, separatorChar: ',');            // 预览数据。             //var dataPreview = trainingDataView.Preview(maxRows:700);            //var dataPreview2 = testDataView.Preview();            // 训练代码的其余部分在这里...            var trainer = mlContext.BinaryClassification.Trainers.LinearSvm("Label", "Features");            var trainingPipeline = mlContext.Transforms.Concatenate(outputColumnName: "Features", nameof(BreastCancerData.AreaMean),                                nameof(BreastCancerData.AreaSe), nameof(BreastCancerData.AreaWorst), nameof(BreastCancerData.CompactnessMean),                                nameof(BreastCancerData.CompactnessSe), nameof(BreastCancerData.CompactnessWorst), nameof(BreastCancerData.ConcavePointsMean),                                nameof(BreastCancerData.ConcavePointsSe), nameof(BreastCancerData.ConcavePointsWorst), nameof(BreastCancerData.ConcavityMean),                                nameof(BreastCancerData.ConcavitySe), nameof(BreastCancerData.ConcavityWorst), nameof(BreastCancerData.FractalDimensionMean),                                nameof(BreastCancerData.FractalDimensionSe), nameof(BreastCancerData.FractalDimensionWorst), nameof(BreastCancerData.Id),                                nameof(BreastCancerData.PerimeterMean), nameof(BreastCancerData.PerimeterSe), nameof(BreastCancerData.PerimeterWorst),                                nameof(BreastCancerData.RadiusMean), nameof(BreastCancerData.RadiusSe), nameof(BreastCancerData.SmoothnessMean),                                nameof(BreastCancerData.SmoothnessSe), nameof(BreastCancerData.SmoothnessWorst), nameof(BreastCancerData.SymmetryMean),                                nameof(BreastCancerData.SymmetrySe), nameof(BreastCancerData.SymmetryWorst), nameof(BreastCancerData.TextureMean),                                nameof(BreastCancerData.TextureSe), nameof(BreastCancerData.TextureWorst))                    .Append(mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(BreastCancerData.Diagnosis)))                    .Append(trainer);            // 预览训练和转换的结果。            var transformationPreview = trainingPipeline.Preview(trainingDataView, maxRows: 700);            try            {                var model = trainingPipeline.Fit(trainingDataView);                using (var file = File.OpenWrite(Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "uploads"), "model.ZIP")))                    mlContext.Model.Save(model, trainingDataView.Schema, file);                ITransformer trainedModel;                using (var stream = File.OpenRead(Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "uploads"), "model.ZIP")))                {                    trainedModel = mlContext.Model.Load(stream, out var modelInputSchema);                }                var predictionEngine = mlContext.Model.CreatePredictionEngine<BreastCancerData, BreastCancerPrediction>(trainedModel);                Console.WriteLine("** 测试产品1 **");                // 预测                 BreastCancerPrediction prediction = predictionEngine.Predict(input);                Console.WriteLine($"产品: {input.Id} - 诊断结果: {prediction.Prediction}");                BinaryClassificationMetrics metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(model.Transform(testDataView), "Label");                Console.WriteLine($"准确率: {metrics.Accuracy:P2}");                Console.WriteLine($"负例精确度: {metrics.NegativePrecision:P2}");                Console.WriteLine($"正例精确度: {metrics.PositivePrecision:P2}");                Console.WriteLine($"正例召回率: {metrics.PositiveRecall:P2}");                Console.WriteLine($"负例召回率: {metrics.NegativeRecall:P2}");                Console.WriteLine($"ROC曲线下面积: {metrics.AreaUnderRocCurve:P2}");                return prediction.Prediction;            }            catch (Exception e)            {            }            return false;

编辑1:

public class BreastCancerData    {        [LoadColumn(0)]        public float Id { get; set; }        [LoadColumn(1)]        public bool Diagnosis { get; set; }        [LoadColumn(2)]        public float RadiusMean { get; set; }        [LoadColumn(3)]        public float TextureMean { get; set; }        [LoadColumn(4)]        public float PerimeterMean { get; set; }        [LoadColumn(5)]        public float AreaMean { get; set; }        [LoadColumn(6)]        public float SmoothnessMean { get; set; }        [LoadColumn(7)]        public float CompactnessMean { get; set; }        [LoadColumn(8)]        public float ConcavityMean { get; set; }        [LoadColumn(9)]        public float ConcavePointsMean { get; set; }        [LoadColumn(10)]        public float SymmetryMean { get; set; }        [LoadColumn(11)]        public float FractalDimensionMean { get; set; }        [LoadColumn(12)]        public float RadiusSe { get; set; }        [LoadColumn(13)]        public float TextureSe { get; set; }        [LoadColumn(14)]        public float PerimeterSe { get; set; }        [LoadColumn(15)]        public float AreaSe { get; set; }        [LoadColumn(16)]        public float SmoothnessSe { get; set; }        [LoadColumn(17)]        public float CompactnessSe { get; set; }        [LoadColumn(18)]        public float ConcavitySe { get; set; }        [LoadColumn(19)]        public float ConcavePointsSe { get; set; }        [LoadColumn(20)]        public float SymmetrySe { get; set; }        [LoadColumn(21)]        public float FractalDimensionSe { get; set; }        [LoadColumn(22)]        public float RadiusWorst { get; set; }        [LoadColumn(23)]        public float TextureWorst { get; set; }        [LoadColumn(24)]        public float PerimeterWorst { get; set; }        [LoadColumn(25)]        public float AreaWorst { get; set; }        [LoadColumn(26)]        public float SmoothnessWorst { get; set; }        [LoadColumn(27)]        public float CompactnessWorst { get; set; }        [LoadColumn(28)]        public float ConcavityWorst { get; set; }        [LoadColumn(29)]        public float ConcavePointsWorst { get; set; }        [LoadColumn(30)]        public float SymmetryWorst { get; set; }        [LoadColumn(31)]        public float FractalDimensionWorst { get; set; }    }    public class BreastCancerPrediction : BreastCancerData    {        [ColumnName("PredictedLabel")]        public bool Prediction { get; set; }    }

请不要在意我使用了如此多的属性而不是向量。


回答:

好的,我通过删除Id属性解决了这个问题。现在,使用SDCA算法的准确率达到了97.x%。

Related Posts

在使用k近邻算法时,有没有办法获取被使用的“邻居”?

我想找到一种方法来确定在我的knn算法中实际使用了哪些…

Theano在Google Colab上无法启用GPU支持

我在尝试使用Theano库训练一个模型。由于我的电脑内…

准确性评分似乎有误

这里是代码: from sklearn.metrics…

Keras Functional API: “错误检查输入时:期望input_1具有4个维度,但得到形状为(X, Y)的数组”

我在尝试使用Keras的fit_generator来训…

如何使用sklearn.datasets.make_classification在指定范围内生成合成数据?

我想为分类问题创建合成数据。我使用了sklearn.d…

如何处理预测时不在训练集中的标签

已关闭。 此问题与编程或软件开发无关。目前不接受回答。…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注