Skip to content

Fixing parameters in ML.NET Public API #2665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 25, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@ public static class TreeExtensions
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumn">The label column.</param>
/// <param name="featureColumn">The feature column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.RegressionTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastTreeRegressionTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastTreeRegressionTrainer(env, labelColumn, featureColumn, exampleWeightColumnName, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand All @@ -59,23 +59,23 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastTreeBinaryClassificationTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastTreeBinaryClassificationTrainer(env, labelColumn, featureColumn, exampleWeightColumnName, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand All @@ -100,7 +100,7 @@ public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassifica
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="groupId">The groupId column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data.</param>
Expand All @@ -109,15 +109,15 @@ public static FastTreeRankingTrainer FastTree(this RankingCatalog.RankingTrainer
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string groupId = DefaultColumnNames.GroupId,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastTreeRankingTrainer(env, labelColumn, featureColumn, groupId, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastTreeRankingTrainer(env, labelColumn, featureColumn, groupId, exampleWeightColumnName, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand All @@ -141,21 +141,21 @@ public static FastTreeRankingTrainer FastTree(this RankingCatalog.RankingTrainer
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numIterations">The number of iterations to use in learning the features.</param>
/// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
/// <param name="maxBins">The maximum number of bins to use to approximate features.</param>
public static BinaryClassificationGamTrainer GeneralizedAdditiveModels(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numIterations = GamDefaults.NumIterations,
double learningRate = GamDefaults.LearningRates,
int maxBins = GamDefaults.MaxBins)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new BinaryClassificationGamTrainer(env, labelColumn, featureColumn, weights, numIterations, learningRate, maxBins);
return new BinaryClassificationGamTrainer(env, labelColumn, featureColumn, exampleWeightColumnName, numIterations, learningRate, maxBins);
}

/// <summary>
Expand All @@ -177,21 +177,21 @@ public static BinaryClassificationGamTrainer GeneralizedAdditiveModels(this Bina
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numIterations">The number of iterations to use in learning the features.</param>
/// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
/// <param name="maxBins">The maximum number of bins to use to approximate features.</param>
public static RegressionGamTrainer GeneralizedAdditiveModels(this RegressionCatalog.RegressionTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numIterations = GamDefaults.NumIterations,
double learningRate = GamDefaults.LearningRates,
int maxBins = GamDefaults.MaxBins)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new RegressionGamTrainer(env, labelColumn, featureColumn, weights, numIterations, learningRate, maxBins);
return new RegressionGamTrainer(env, labelColumn, featureColumn, exampleWeightColumnName, numIterations, learningRate, maxBins);
}

/// <summary>
Expand All @@ -213,23 +213,23 @@ public static RegressionGamTrainer GeneralizedAdditiveModels(this RegressionCata
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionCatalog.RegressionTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastTreeTweedieTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastTreeTweedieTrainer(env, labelColumn, featureColumn, exampleWeightColumnName, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand All @@ -253,23 +253,23 @@ public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionCatalog.Regr
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
public static FastForestRegression FastForest(this RegressionCatalog.RegressionTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastForestRegression(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastForestRegression(env, labelColumn, featureColumn, exampleWeightColumnName, numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand All @@ -293,23 +293,23 @@ public static FastForestRegression FastForest(this RegressionCatalog.RegressionT
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="labelColumn">The labelColumn column.</param>
/// <param name="featureColumn">The featureColumn column.</param>
/// <param name="weights">The optional weights column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="numLeaves">The maximum number of leaves per decision tree.</param>
/// <param name="minDatapointsInLeaves">The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
public static FastForestClassification FastForest(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new FastForestClassification(env, labelColumn, featureColumn, weights,numLeaves, numTrees, minDatapointsInLeaves, learningRate);
return new FastForestClassification(env, labelColumn, featureColumn, exampleWeightColumnName,numLeaves, numTrees, minDatapointsInLeaves, learningRate);
}

/// <summary>
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static class HalLearnersCatalog
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="weightColumn">The name of optional weight column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
Expand All @@ -31,15 +31,15 @@ public static class HalLearnersCatalog
public static OlsLinearRegressionTrainer OrdinaryLeastSquares(this RegressionCatalog.RegressionTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string weightColumn = null)
string exampleWeightColumnName = null)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
var options = new OlsLinearRegressionTrainer.Options
{
LabelColumn = labelColumnName,
FeatureColumn = featureColumnName,
WeightColumn = weightColumn != null ? Optional<string>.Explicit(weightColumn) : Optional<string>.Implicit(DefaultColumnNames.Weight)
WeightColumn = exampleWeightColumnName != null ? Optional<string>.Explicit(exampleWeightColumnName) : Optional<string>.Implicit(DefaultColumnNames.Weight)
Copy link
Contributor

@TomFinley TomFinley Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh oh. Am I wrong or is there literally no way to have non-weighted training if you happen to have a column named Weight????

It looks like there's an existing issue #1065 that is capturing this. Not your code, so don't have to fix, I just notice this way. #Resolved

Copy link
Contributor

@TomFinley TomFinley Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I see it's incidentally assigned to you @abgoswam . 😄 Maybe after this PR we can think about how to address this. #Resolved

Copy link
Member Author

@abgoswam abgoswam Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. We should fix this behavior as part of #1065 (in a separate PR). #Closed

};

return new OlsLinearRegressionTrainer(env, options);
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.KMeansClustering/KMeansCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static class KMeansClusteringExtensions
/// </summary>
/// <param name="catalog">The clustering catalog trainer object.</param>
/// <param name="featureColumn">The features, or independent variables.</param>
/// <param name="weights">The optional example weights.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="clustersCount">The number of clusters to use for KMeans.</param>
/// <example>
/// <format type="text/markdown">
Expand All @@ -29,7 +29,7 @@ public static class KMeansClusteringExtensions
/// </example>
public static KMeansPlusPlusTrainer KMeans(this ClusteringCatalog.ClusteringTrainers catalog,
string featureColumn = DefaultColumnNames.Features,
Copy link
Contributor

@TomFinley TomFinley Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

featureColumn [](start = 18, length = 13)

So we're calling this exampleWeightColumnName, but calling the above featureColumn. I'm not terribly insistent one way or the other, but we should at least be consistent I think.

In the original issue you proposed calling this * exampleWeightColumn, which seems to me to be consistent with the existing behavior here. Yet elsewhere I see that we are calling things labelColumn and labelColumnName here and there.

Is this on someone's radar? If we are not going to fix it (we should!) a second best alternative would be to be at least locally consistent. Not sure what @rogancarr and @eerhardt and @sfilipi think? #Resolved

Copy link
Member Author

@abgoswam abgoswam Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #2177 proposes we use suffix ColumnName consistently

In this PR I am making that change for weight, groupid parameters. In a follow up PR I will do it for label and features.

So once both PRs go in, we will mark #2177 as complete, and the following convention will be used

  • labelColumnName
  • featureColumnName
  • exampleWeightColumnName
  • rowGroupColumnName

This is certainly on my radar, as we drive towards consistency in the APIs . I have also updated the description of the issue so it has reference to issue #2177 . Hope that sounds good ? #Closed

Copy link
Contributor

@TomFinley TomFinley Feb 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very good, thank you @abgoswam ! Sorry missed that discussion. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomFinley. I have consolidated all the changes in this PR. So this PR addresses {features, labels, groupid, weight} column names in a consistent manner.


In reply to: 258779314 [](ancestors = 258779314)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomFinley. I have consolidated all the changes in this PR. So this PR addresses {features, labels, groupid, weight} column names in a consistent manner.


In reply to: 258776812 [](ancestors = 258776812)

string weights = null,
string exampleWeightColumnName = null,
int clustersCount = KMeansPlusPlusTrainer.Defaults.ClustersCount)
{
Contracts.CheckValue(catalog, nameof(catalog));
Expand All @@ -38,7 +38,7 @@ public static KMeansPlusPlusTrainer KMeans(this ClusteringCatalog.ClusteringTrai
var options = new KMeansPlusPlusTrainer.Options
{
FeatureColumn = featureColumn,
WeightColumn = weights != null ? Optional<string>.Explicit(weights) : Optional<string>.Implicit(DefaultColumnNames.Weight),
WeightColumn = exampleWeightColumnName != null ? Optional<string>.Explicit(exampleWeightColumnName) : Optional<string>.Implicit(DefaultColumnNames.Weight),
ClustersCount = clustersCount
};
return new KMeansPlusPlusTrainer(env, options);
Expand Down
Loading