Skip to content

Reformatted Ranking samples to width 85 #3930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,22 @@ namespace Samples.Dynamic.Trainers.Ranking
{
public static class FastTree
{
// This example requires installation of additional NuGet package
// <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define the trainer.
Expand All @@ -29,17 +32,21 @@ public static void Example()
// Train the model.
var model = pipeline.Fit(trainingData);

// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Take the top 5 rows.
var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5);
var topTransformedTestData = mlContext.Data.TakeRows(
transformedTestData, 5);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(topTransformedTestData, reuseRowObject: false).ToList();
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
topTransformedTestData, reuseRowObject: false).ToList();

// Print 5 predictions.
foreach (var p in predictions)
Expand All @@ -61,7 +68,8 @@ public static void Example()
// NDCG: @1:0.99, @2:0.98, @3:0.99
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10)
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0, int groupSize = 10)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
Expand All @@ -73,13 +81,16 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
Label = (uint)label,
GroupId = (uint)(i / groupSize),
// Create random features that are correlated with the label.
// For data points with larger labels, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray()
// For data points with larger labels, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(
x => randomFloat() + x * 0.1f).ToArray()
};
}
}

// Example with label, groupId, and 50 feature values. A data set is a collection of such examples.
// Example with label, groupId, and 50 feature values. A data set is a
// collection of such examples.
private class DataPoint
{
[KeyType(5)]
Expand All @@ -102,8 +113,13 @@ private class Prediction
// Pretty-print RankerMetrics objects.
public static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine("DCG: " + string.Join(", ",
metrics.DiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));

Console.WriteLine("NDCG: " + string.Join(", ",
metrics.NormalizedDiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ string TrainerOptions = null;

string OptionsInclude = "";
string Comments= @"
// This example requires installation of additional NuGet package
// <a href=""https://www.nuget.org/packages/Microsoft.ML.FastTree/"">Microsoft.ML.FastTree</a>.";
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: 5, Score: 13.0154
Expand All @@ -19,4 +20,4 @@ string ExpectedOutputPerInstance = @"// Expected output:
string ExpectedOutput = @"// Expected output:
// DCG: @1:41.95, @2:63.33, @3:75.65
// NDCG: @1:0.99, @2:0.98, @3:0.99";
#>
#>
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Ranking
{
public static class FastTreeWithOptions
{
// This example requires installation of additional NuGet package
// <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define trainer options.
Expand All @@ -43,17 +46,21 @@ public static void Example()
// Train the model.
var model = pipeline.Fit(trainingData);

// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Take the top 5 rows.
var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5);
var topTransformedTestData = mlContext.Data.TakeRows(
transformedTestData, 5);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(topTransformedTestData, reuseRowObject: false).ToList();
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
topTransformedTestData, reuseRowObject: false).ToList();

// Print 5 predictions.
foreach (var p in predictions)
Expand All @@ -75,7 +82,8 @@ public static void Example()
// NDCG: @1:0.96, @2:0.95, @3:0.97
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10)
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0, int groupSize = 10)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
Expand All @@ -87,13 +95,16 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
Label = (uint)label,
GroupId = (uint)(i / groupSize),
// Create random features that are correlated with the label.
// For data points with larger labels, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray()
// For data points with larger labels, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(
x => randomFloat() + x * 0.1f).ToArray()
};
}
}

// Example with label, groupId, and 50 feature values. A data set is a collection of such examples.
// Example with label, groupId, and 50 feature values. A data set is a
// collection of such examples.
private class DataPoint
{
[KeyType(5)]
Expand All @@ -116,8 +127,12 @@ private class Prediction
// Pretty-print RankerMetrics objects.
public static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine("DCG: " + string.Join(", ",
metrics.DiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
Console.WriteLine("NDCG: " + string.Join(", ",
metrics.NormalizedDiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ string TrainerOptions = @"FastTreeRankingTrainer.Options

string OptionsInclude = "using Microsoft.ML.Trainers.FastTree;";
string Comments= @"
// This example requires installation of additional NuGet package
// <a href=""https://www.nuget.org/packages/Microsoft.ML.FastTree/"">Microsoft.ML.FastTree</a>.";
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: 5, Score: 8.807633
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,22 @@ namespace Samples.Dynamic.Trainers.Ranking
{
public static class LightGbm
{
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
// Convert the list of data points to an IDataView object, which is
// consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define the trainer.
Expand All @@ -27,39 +32,44 @@ public static void Example()
// Train the model.
var model = pipeline.Fit(trainingData);

// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Create testing data. Use different random seed to make it different
// from training data.
var testData = mlContext.Data.LoadFromEnumerable(
GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Take the top 5 rows.
var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5);
var topTransformedTestData = mlContext.Data.TakeRows(
transformedTestData, 5);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(topTransformedTestData, reuseRowObject: false).ToList();
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
topTransformedTestData, reuseRowObject: false).ToList();

// Print 5 predictions.
foreach (var p in predictions)
Console.WriteLine($"Label: {p.Label}, Score: {p.Score}");

// Expected output:
// Label: 5, Score: 2.195333
// Label: 4, Score: 0.2596574
// Label: 4, Score: -2.168355
// Label: 1, Score: -3.074823
// Label: 1, Score: -1.523607
// Label: 5, Score: 2.493263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did these numbers change for the sample?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah they must have changed from a more recent pull.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I see that the sample is using random data points. Thanks

// Label: 1, Score: -4.528436
// Label: 3, Score: -3.002865
// Label: 3, Score: -2.151812
// Label: 1, Score: -4.089102

// Evaluate the overall metrics.
var metrics = mlContext.Ranking.Evaluate(transformedTestData);
PrintMetrics(metrics);

// Expected output:
// DCG: @1:26.03, @2:37.57, @3:45.83
// NDCG: @1:0.61, @2:0.57, @3:0.59
// DCG: @1:41.95, @2:63.76, @3:75.97
// NDCG: @1:0.99, @2:0.99, @3:0.99
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10)
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
int seed = 0, int groupSize = 10)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
Expand All @@ -71,13 +81,16 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
Label = (uint)label,
GroupId = (uint)(i / groupSize),
// Create random features that are correlated with the label.
// For data points with larger labels, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray()
// For data points with larger labels, the feature values are
// slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(
x => randomFloat() + x * 0.1f).ToArray()
};
}
}

// Example with label, groupId, and 50 feature values. A data set is a collection of such examples.
// Example with label, groupId, and 50 feature values. A data set is a
// collection of such examples.
private class DataPoint
{
[KeyType(5)]
Expand All @@ -100,8 +113,12 @@ private class Prediction
// Pretty-print RankerMetrics objects.
public static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}");
Console.WriteLine("DCG: " + string.Join(", ",
metrics.DiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
Console.WriteLine("NDCG: " + string.Join(", ",
metrics.NormalizedDiscountedCumulativeGains.Select(
(d, i) => (i + 1) + ":" + d + ":F2").ToArray()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ string TrainerOptions = null;

string OptionsInclude = "";
string Comments= @"
// This example requires installation of additional NuGet package
// <a href=""https://www.nuget.org/packages/Microsoft.ML.LightGbm/"">Microsoft.ML.LightGbm</a>.";
// This example requires installation of additional NuGet package for
// Microsoft.ML.FastTree at
// https://www.nuget.org/packages/Microsoft.ML.FastTree/";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: 5, Score: 2.493263
Expand Down
Loading