From 4ed3ca8539c4de324092a1e0702075c4f4fbd65a Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Wed, 18 Dec 2019 17:07:28 +0200 Subject: [PATCH 01/10] Add non-calibrated evaluation to PFI --- .../CalibratedBinaryClassificationMetrics.cs | 10 +++ .../PermutationFeatureImportance.cs | 2 +- .../PermutationFeatureImportanceExtensions.cs | 65 ++++++++++++++++--- .../PermutationFeatureImportanceTests.cs | 12 ++++ 4 files changed, 78 insertions(+), 11 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index a2d193deed..9e21fe1cf2 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,5 +49,15 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } + + [BestFriend] + internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, + double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) + : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) + { + LogLoss = logLoss; + LogLossReduction = logLossReduction; + Entropy = entropy; + } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportance.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportance.cs index 1bd034ff74..648e49f565 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportance.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportance.cs @@ -171,7 +171,7 @@ public static ImmutableArray int processedCnt = 0; int nextFeatureIndex = 0; var shuffleRand = RandomUtils.Create(host.Rand.Next()); - using (var pch = host.StartProgressChannel("SDCA preprocessing with lookup")) + using (var pch = host.StartProgressChannel("Calculating Permutation Feature Importance")) { pch.SetHeader(new ProgressHeader("processed slots"), e => e.SetProgress(0, processedCnt)); foreach (var workingIndx in workingFeatureIndices) diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 07b9c8f435..3ffc85cfac 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -4,7 +4,9 @@ using System.Collections.Generic; using System.Collections.Immutable; +using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; @@ -144,17 +146,43 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { + bool isCalibratedModel = false; + var type = predictionTransformer.Model.GetType(); + if (type.IsGenericType) + { + var genArgs = type.GetGenericArguments(); + if (Utils.Size(genArgs) == 2) + { + var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); + if (calibratedModelType.IsAssignableFrom(type)) + isCalibratedModel = true; + } + } + if (isCalibratedModel) + { + return PermutationFeatureImportance.GetImportanceMetricsMatrix( + catalog.GetEnvironment(), + predictionTransformer, + data, + () => new BinaryClassificationMetricsStatistics(), + idv => catalog.Evaluate(idv, labelColumnName), + BinaryClassifierDelta, + predictionTransformer.FeatureColumnName, + permutationCount, + useFeatureWeightFilter, + numberOfExamplesToUse); + } return PermutationFeatureImportance.GetImportanceMetricsMatrix( - catalog.GetEnvironment(), - predictionTransformer, - data, - () => new BinaryClassificationMetricsStatistics(), - idv => catalog.Evaluate(idv, labelColumnName), - BinaryClassifierDelta, - predictionTransformer.FeatureColumnName, - permutationCount, - useFeatureWeightFilter, - numberOfExamplesToUse); + catalog.GetEnvironment(), + predictionTransformer, + data, + () => new BinaryClassificationMetricsStatistics(), + idv => catalog.EvaluateNonCalibrated(idv, labelColumnName), + BinaryClassifierDelta, + predictionTransformer.FeatureColumnName, + permutationCount, + useFeatureWeightFilter, + numberOfExamplesToUse); } private static BinaryClassificationMetrics BinaryClassifierDelta( @@ -171,6 +199,23 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } + private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( + CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) + { + return new CalibratedBinaryClassificationMetrics( + auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, + accuracy: a.Accuracy - b.Accuracy, + positivePrecision: a.PositivePrecision - b.PositivePrecision, + positiveRecall: a.PositiveRecall - b.PositiveRecall, + negativePrecision: a.NegativePrecision - b.NegativePrecision, + negativeRecall: a.NegativeRecall - b.NegativeRecall, + f1Score: a.F1Score - b.F1Score, + auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, + logLoss: a.LogLoss - b.LogLoss, + logLossReduction: a.LogLossReduction - b.LogLossReduction, + entropy: a.Entropy - b.Entropy); + } + #endregion Binary Classification #region Multiclass Classification diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index 3feca9421b..88e301be84 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -305,6 +305,18 @@ public void TestPfiBinaryClassificationOnSparseFeatures(bool saveModel) Done(); } + + [Fact] + public void TestBinaryClassificationWithoutCalibrator() + { + var dataPath = GetDataPath("breast-cancer.txt"); + var ff = ML.BinaryClassification.Trainers.FastForest(); + var data = ML.Data.LoadFromTextFile(dataPath, + new[] { new TextLoader.Column("Label", DataKind.Boolean, 0), + new TextLoader.Column("Features", DataKind.Single, 1, 9) }); + var model = ff.Fit(data); + var pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + } #endregion #region Multiclass Classification Tests From fe48872a339987e60d65887dd05034448b644024 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Thu, 2 Jan 2020 14:58:57 +0200 Subject: [PATCH 02/10] change to always call EvaluateNonCalibrated --- .../CalibratedBinaryClassificationMetrics.cs | 10 ----- .../PermutationFeatureImportanceExtensions.cs | 43 ------------------- 2 files changed, 53 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index 9e21fe1cf2..a2d193deed 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,15 +49,5 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } - - [BestFriend] - internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, - double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) - : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) - { - LogLoss = logLoss; - LogLossReduction = logLossReduction; - Entropy = entropy; - } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 3ffc85cfac..284d26ccc6 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,32 +146,6 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { - bool isCalibratedModel = false; - var type = predictionTransformer.Model.GetType(); - if (type.IsGenericType) - { - var genArgs = type.GetGenericArguments(); - if (Utils.Size(genArgs) == 2) - { - var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); - if (calibratedModelType.IsAssignableFrom(type)) - isCalibratedModel = true; - } - } - if (isCalibratedModel) - { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( - catalog.GetEnvironment(), - predictionTransformer, - data, - () => new BinaryClassificationMetricsStatistics(), - idv => catalog.Evaluate(idv, labelColumnName), - BinaryClassifierDelta, - predictionTransformer.FeatureColumnName, - permutationCount, - useFeatureWeightFilter, - numberOfExamplesToUse); - } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -199,23 +173,6 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } - private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( - CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) - { - return new CalibratedBinaryClassificationMetrics( - auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, - accuracy: a.Accuracy - b.Accuracy, - positivePrecision: a.PositivePrecision - b.PositivePrecision, - positiveRecall: a.PositiveRecall - b.PositiveRecall, - negativePrecision: a.NegativePrecision - b.NegativePrecision, - negativeRecall: a.NegativeRecall - b.NegativeRecall, - f1Score: a.F1Score - b.F1Score, - auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, - logLoss: a.LogLoss - b.LogLoss, - logLossReduction: a.LogLossReduction - b.LogLossReduction, - entropy: a.Entropy - b.Entropy); - } - #endregion Binary Classification #region Multiclass Classification From b6ed4b325301613fa739dde620c6df2f3db80019 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Wed, 18 Dec 2019 17:07:28 +0200 Subject: [PATCH 03/10] Add non-calibrated evaluation to PFI --- .../CalibratedBinaryClassificationMetrics.cs | 10 +++++ .../PermutationFeatureImportanceExtensions.cs | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index a2d193deed..9e21fe1cf2 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,5 +49,15 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } + + [BestFriend] + internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, + double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) + : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) + { + LogLoss = logLoss; + LogLossReduction = logLossReduction; + Entropy = entropy; + } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 284d26ccc6..3ffc85cfac 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,6 +146,32 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { + bool isCalibratedModel = false; + var type = predictionTransformer.Model.GetType(); + if (type.IsGenericType) + { + var genArgs = type.GetGenericArguments(); + if (Utils.Size(genArgs) == 2) + { + var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); + if (calibratedModelType.IsAssignableFrom(type)) + isCalibratedModel = true; + } + } + if (isCalibratedModel) + { + return PermutationFeatureImportance.GetImportanceMetricsMatrix( + catalog.GetEnvironment(), + predictionTransformer, + data, + () => new BinaryClassificationMetricsStatistics(), + idv => catalog.Evaluate(idv, labelColumnName), + BinaryClassifierDelta, + predictionTransformer.FeatureColumnName, + permutationCount, + useFeatureWeightFilter, + numberOfExamplesToUse); + } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -173,6 +199,23 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } + private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( + CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) + { + return new CalibratedBinaryClassificationMetrics( + auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, + accuracy: a.Accuracy - b.Accuracy, + positivePrecision: a.PositivePrecision - b.PositivePrecision, + positiveRecall: a.PositiveRecall - b.PositiveRecall, + negativePrecision: a.NegativePrecision - b.NegativePrecision, + negativeRecall: a.NegativeRecall - b.NegativeRecall, + f1Score: a.F1Score - b.F1Score, + auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, + logLoss: a.LogLoss - b.LogLoss, + logLossReduction: a.LogLossReduction - b.LogLossReduction, + entropy: a.Entropy - b.Entropy); + } + #endregion Binary Classification #region Multiclass Classification From 0559fb7840c59c85c1430171162f5421bc853b4b Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Thu, 2 Jan 2020 14:58:57 +0200 Subject: [PATCH 04/10] change to always call EvaluateNonCalibrated --- .../CalibratedBinaryClassificationMetrics.cs | 10 ----- .../PermutationFeatureImportanceExtensions.cs | 43 ------------------- 2 files changed, 53 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index 9e21fe1cf2..a2d193deed 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,15 +49,5 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } - - [BestFriend] - internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, - double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) - : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) - { - LogLoss = logLoss; - LogLossReduction = logLossReduction; - Entropy = entropy; - } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 3ffc85cfac..284d26ccc6 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,32 +146,6 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { - bool isCalibratedModel = false; - var type = predictionTransformer.Model.GetType(); - if (type.IsGenericType) - { - var genArgs = type.GetGenericArguments(); - if (Utils.Size(genArgs) == 2) - { - var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); - if (calibratedModelType.IsAssignableFrom(type)) - isCalibratedModel = true; - } - } - if (isCalibratedModel) - { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( - catalog.GetEnvironment(), - predictionTransformer, - data, - () => new BinaryClassificationMetricsStatistics(), - idv => catalog.Evaluate(idv, labelColumnName), - BinaryClassifierDelta, - predictionTransformer.FeatureColumnName, - permutationCount, - useFeatureWeightFilter, - numberOfExamplesToUse); - } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -199,23 +173,6 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } - private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( - CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) - { - return new CalibratedBinaryClassificationMetrics( - auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, - accuracy: a.Accuracy - b.Accuracy, - positivePrecision: a.PositivePrecision - b.PositivePrecision, - positiveRecall: a.PositiveRecall - b.PositiveRecall, - negativePrecision: a.NegativePrecision - b.NegativePrecision, - negativeRecall: a.NegativeRecall - b.NegativeRecall, - f1Score: a.F1Score - b.F1Score, - auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, - logLoss: a.LogLoss - b.LogLoss, - logLossReduction: a.LogLossReduction - b.LogLossReduction, - entropy: a.Entropy - b.Entropy); - } - #endregion Binary Classification #region Multiclass Classification From 6558551529f6ea1de5c4e84e61d9b82a03ba5ee1 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Wed, 18 Dec 2019 17:07:28 +0200 Subject: [PATCH 05/10] Add non-calibrated evaluation to PFI --- .../CalibratedBinaryClassificationMetrics.cs | 10 +++++ .../PermutationFeatureImportanceExtensions.cs | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index a2d193deed..9e21fe1cf2 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,5 +49,15 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } + + [BestFriend] + internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, + double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) + : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) + { + LogLoss = logLoss; + LogLossReduction = logLossReduction; + Entropy = entropy; + } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 284d26ccc6..3ffc85cfac 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,6 +146,32 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { + bool isCalibratedModel = false; + var type = predictionTransformer.Model.GetType(); + if (type.IsGenericType) + { + var genArgs = type.GetGenericArguments(); + if (Utils.Size(genArgs) == 2) + { + var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); + if (calibratedModelType.IsAssignableFrom(type)) + isCalibratedModel = true; + } + } + if (isCalibratedModel) + { + return PermutationFeatureImportance.GetImportanceMetricsMatrix( + catalog.GetEnvironment(), + predictionTransformer, + data, + () => new BinaryClassificationMetricsStatistics(), + idv => catalog.Evaluate(idv, labelColumnName), + BinaryClassifierDelta, + predictionTransformer.FeatureColumnName, + permutationCount, + useFeatureWeightFilter, + numberOfExamplesToUse); + } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -173,6 +199,23 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } + private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( + CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) + { + return new CalibratedBinaryClassificationMetrics( + auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, + accuracy: a.Accuracy - b.Accuracy, + positivePrecision: a.PositivePrecision - b.PositivePrecision, + positiveRecall: a.PositiveRecall - b.PositiveRecall, + negativePrecision: a.NegativePrecision - b.NegativePrecision, + negativeRecall: a.NegativeRecall - b.NegativeRecall, + f1Score: a.F1Score - b.F1Score, + auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, + logLoss: a.LogLoss - b.LogLoss, + logLossReduction: a.LogLossReduction - b.LogLossReduction, + entropy: a.Entropy - b.Entropy); + } + #endregion Binary Classification #region Multiclass Classification From 2d53161efc9543f50fcd51da1398cd87472154d6 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Thu, 2 Jan 2020 14:58:57 +0200 Subject: [PATCH 06/10] change to always call EvaluateNonCalibrated --- .../CalibratedBinaryClassificationMetrics.cs | 10 ----- .../PermutationFeatureImportanceExtensions.cs | 43 ------------------- 2 files changed, 53 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index 9e21fe1cf2..a2d193deed 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,15 +49,5 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } - - [BestFriend] - internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, - double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) - : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) - { - LogLoss = logLoss; - LogLossReduction = logLossReduction; - Entropy = entropy; - } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 3ffc85cfac..284d26ccc6 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,32 +146,6 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { - bool isCalibratedModel = false; - var type = predictionTransformer.Model.GetType(); - if (type.IsGenericType) - { - var genArgs = type.GetGenericArguments(); - if (Utils.Size(genArgs) == 2) - { - var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); - if (calibratedModelType.IsAssignableFrom(type)) - isCalibratedModel = true; - } - } - if (isCalibratedModel) - { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( - catalog.GetEnvironment(), - predictionTransformer, - data, - () => new BinaryClassificationMetricsStatistics(), - idv => catalog.Evaluate(idv, labelColumnName), - BinaryClassifierDelta, - predictionTransformer.FeatureColumnName, - permutationCount, - useFeatureWeightFilter, - numberOfExamplesToUse); - } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -199,23 +173,6 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } - private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( - CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) - { - return new CalibratedBinaryClassificationMetrics( - auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, - accuracy: a.Accuracy - b.Accuracy, - positivePrecision: a.PositivePrecision - b.PositivePrecision, - positiveRecall: a.PositiveRecall - b.PositiveRecall, - negativePrecision: a.NegativePrecision - b.NegativePrecision, - negativeRecall: a.NegativeRecall - b.NegativeRecall, - f1Score: a.F1Score - b.F1Score, - auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, - logLoss: a.LogLoss - b.LogLoss, - logLossReduction: a.LogLossReduction - b.LogLossReduction, - entropy: a.Entropy - b.Entropy); - } - #endregion Binary Classification #region Multiclass Classification From bd2af8e09ff186c50e13ff38cb30b0995c90c590 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Wed, 18 Dec 2019 17:07:28 +0200 Subject: [PATCH 07/10] Add non-calibrated evaluation to PFI --- .../CalibratedBinaryClassificationMetrics.cs | 10 +++++ .../PermutationFeatureImportanceExtensions.cs | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index a2d193deed..9e21fe1cf2 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,5 +49,15 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } + + [BestFriend] + internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, + double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) + : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) + { + LogLoss = logLoss; + LogLossReduction = logLossReduction; + Entropy = entropy; + } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 284d26ccc6..3ffc85cfac 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,6 +146,32 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { + bool isCalibratedModel = false; + var type = predictionTransformer.Model.GetType(); + if (type.IsGenericType) + { + var genArgs = type.GetGenericArguments(); + if (Utils.Size(genArgs) == 2) + { + var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); + if (calibratedModelType.IsAssignableFrom(type)) + isCalibratedModel = true; + } + } + if (isCalibratedModel) + { + return PermutationFeatureImportance.GetImportanceMetricsMatrix( + catalog.GetEnvironment(), + predictionTransformer, + data, + () => new BinaryClassificationMetricsStatistics(), + idv => catalog.Evaluate(idv, labelColumnName), + BinaryClassifierDelta, + predictionTransformer.FeatureColumnName, + permutationCount, + useFeatureWeightFilter, + numberOfExamplesToUse); + } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -173,6 +199,23 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } + private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( + CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) + { + return new CalibratedBinaryClassificationMetrics( + auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, + accuracy: a.Accuracy - b.Accuracy, + positivePrecision: a.PositivePrecision - b.PositivePrecision, + positiveRecall: a.PositiveRecall - b.PositiveRecall, + negativePrecision: a.NegativePrecision - b.NegativePrecision, + negativeRecall: a.NegativeRecall - b.NegativeRecall, + f1Score: a.F1Score - b.F1Score, + auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, + logLoss: a.LogLoss - b.LogLoss, + logLossReduction: a.LogLossReduction - b.LogLossReduction, + entropy: a.Entropy - b.Entropy); + } + #endregion Binary Classification #region Multiclass Classification From 6e53e5bc2ece07e395aae3da33913e8545a23e47 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Sun, 5 Jan 2020 14:13:16 +0200 Subject: [PATCH 08/10] Add asserts to unit test --- .../PermutationFeatureImportanceExtensions.cs | 43 ------------------- .../PermutationFeatureImportanceTests.cs | 18 ++++++++ 2 files changed, 18 insertions(+), 43 deletions(-) diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 3ffc85cfac..284d26ccc6 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -146,32 +146,6 @@ public static ImmutableArray int? numberOfExamplesToUse = null, int permutationCount = 1) where TModel : class { - bool isCalibratedModel = false; - var type = predictionTransformer.Model.GetType(); - if (type.IsGenericType) - { - var genArgs = type.GetGenericArguments(); - if (Utils.Size(genArgs) == 2) - { - var calibratedModelType = typeof(CalibratedModelParametersBase<,>).MakeGenericType(genArgs); - if (calibratedModelType.IsAssignableFrom(type)) - isCalibratedModel = true; - } - } - if (isCalibratedModel) - { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( - catalog.GetEnvironment(), - predictionTransformer, - data, - () => new BinaryClassificationMetricsStatistics(), - idv => catalog.Evaluate(idv, labelColumnName), - BinaryClassifierDelta, - predictionTransformer.FeatureColumnName, - permutationCount, - useFeatureWeightFilter, - numberOfExamplesToUse); - } return PermutationFeatureImportance.GetImportanceMetricsMatrix( catalog.GetEnvironment(), predictionTransformer, @@ -199,23 +173,6 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve); } - private static CalibratedBinaryClassificationMetrics CalibratedBinaryClassifierDelta( - CalibratedBinaryClassificationMetrics a, CalibratedBinaryClassificationMetrics b) - { - return new CalibratedBinaryClassificationMetrics( - auc: a.AreaUnderRocCurve - b.AreaUnderRocCurve, - accuracy: a.Accuracy - b.Accuracy, - positivePrecision: a.PositivePrecision - b.PositivePrecision, - positiveRecall: a.PositiveRecall - b.PositiveRecall, - negativePrecision: a.NegativePrecision - b.NegativePrecision, - negativeRecall: a.NegativeRecall - b.NegativeRecall, - f1Score: a.F1Score - b.F1Score, - auprc: a.AreaUnderPrecisionRecallCurve - b.AreaUnderPrecisionRecallCurve, - logLoss: a.LogLoss - b.LogLoss, - logLossReduction: a.LogLossReduction - b.LogLossReduction, - entropy: a.Entropy - b.Entropy); - } - #endregion Binary Classification #region Multiclass Classification diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index 88e301be84..5bd999f21b 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -316,6 +316,24 @@ public void TestBinaryClassificationWithoutCalibrator() new TextLoader.Column("Features", DataKind.Single, 1, 9) }); var model = ff.Fit(data); var pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + + // For the following metrics higher is better, so minimum delta means more important feature, and vice versa + Assert.Equal(7, MaxDeltaIndex(pfi, m => m.AreaUnderRocCurve.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.AreaUnderRocCurve.Mean)); + Assert.Equal(3, MaxDeltaIndex(pfi, m => m.Accuracy.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.Accuracy.Mean)); + Assert.Equal(3, MaxDeltaIndex(pfi, m => m.PositivePrecision.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.PositivePrecision.Mean)); + Assert.Equal(3, MaxDeltaIndex(pfi, m => m.PositiveRecall.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.PositiveRecall.Mean)); + Assert.Equal(3, MaxDeltaIndex(pfi, m => m.NegativePrecision.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.NegativePrecision.Mean)); + Assert.Equal(2, MaxDeltaIndex(pfi, m => m.NegativeRecall.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.NegativeRecall.Mean)); + Assert.Equal(3, MaxDeltaIndex(pfi, m => m.F1Score.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.F1Score.Mean)); + Assert.Equal(7, MaxDeltaIndex(pfi, m => m.AreaUnderPrecisionRecallCurve.Mean)); + Assert.Equal(1, MinDeltaIndex(pfi, m => m.AreaUnderPrecisionRecallCurve.Mean)); } #endregion From 8bde46ba7a038d14e3d9f1d5805b2b5cafa0855b Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Sun, 5 Jan 2020 14:18:48 +0200 Subject: [PATCH 09/10] Remove using statements --- .../PermutationFeatureImportanceExtensions.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 284d26ccc6..02e8832f0e 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -4,9 +4,7 @@ using System.Collections.Generic; using System.Collections.Immutable; -using Microsoft.ML.Calibrators; using Microsoft.ML.Data; -using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; From cd7f27c2f35878e624c0ba2288d59df4bfecda72 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Tue, 7 Jan 2020 12:40:27 +0200 Subject: [PATCH 10/10] Remove unused ctor --- .../Metrics/CalibratedBinaryClassificationMetrics.cs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index 9e21fe1cf2..a2d193deed 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -49,15 +49,5 @@ internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallRe LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); Entropy = Fetch(BinaryClassifierEvaluator.Entropy); } - - [BestFriend] - internal CalibratedBinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall, - double negativePrecision, double negativeRecall, double f1Score, double auprc, double logLoss, double logLossReduction, double entropy) - : base(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc) - { - LogLoss = logLoss; - LogLossReduction = logLossReduction; - Entropy = entropy; - } } }