Skip to content

Add Code Gen piece for Recommendation task #4360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
AutoML Add Recommendation Task (#4246)
Trains Recommendation models able to predict rating for existing users
 Conflicts:
	pkg/Microsoft.ML.AutoML/Microsoft.ML.AutoML.nupkgproj
	src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj
	test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
	test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs
	test/Microsoft.ML.AutoML.Tests/ColumnInformationUtilTests.cs
	test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj
	test/Microsoft.ML.AutoML.Tests/TrainerExtensionsTests.cs
	test/Microsoft.ML.AutoML.Tests/TransformInferenceTests.cs
	test/Microsoft.ML.AutoML.Tests/UserInputValidationTests.cs
  • Loading branch information
LittleLittleCloud authored and maryamariyan committed Oct 21, 2019
commit 2263dca4aad0d830a31cf84b66b4866a2a5d6f40
20 changes: 20 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/Movie.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples.DataStructures
{
public class Movie
{
[LoadColumn(0)]
public string UserId;

[LoadColumn(1)]
public string MovieId;

[LoadColumn(2)]
public float Rating;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class MovieRatingPrediction
{
[ColumnName("Score")]
public float Rating;
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
Expand All @@ -7,6 +7,8 @@

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.AutoML\Microsoft.ML.AutoML.csproj" />
<NativeAssemblyReference Include="MatrixFactorizationNative" />
<NativeAssemblyReference Include="FastTreeNative" />
</ItemGroup>

</Project>
3 changes: 3 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ public static void Main(string[] args)
{
try
{
RecommendationExperiment.Run();
Console.Clear();

RegressionExperiment.Run();
Console.Clear();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using System.Linq;
using Microsoft.ML.AutoML.Samples.DataStructures;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class RecommendationExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\Model.zip";
private static string LabelColumnName = "Rating";
private static string UserColumnName = "UserId";
private static string ItemColumnName = "MovieId";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<Movie>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<Movie>(TestDataPath, hasHeader: true, separatorChar: ',');

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML recommendation experiment for {ExperimentTime} seconds...");
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
.CreateRecommendationExperiment(new RecommendationExperimentSettings() { MaxExperimentTimeInSeconds = ExperimentTime })
.Execute(trainDataView, testDataView,
new ColumnInformation()
{
LabelColumnName = LabelColumnName,
UserIdColumnName = UserColumnName,
ItemIdColumnName = ItemColumnName
});

// STEP 3: Print metric from best model
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RegressionMetrics testMetrics = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, ModelPath);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<Movie, MovieRatingPrediction>(bestRun.Model);

// STEP 8: Initialize a new test, and get the prediction
var testMovie = new Movie
{
UserId = "1",
MovieId = "1097",
};
var prediction = predictionEngine.Predict(testMovie);
Console.WriteLine($"Predicted rating for: {prediction.Rating}");

// Only predict for existing users
testMovie = new Movie
{
UserId = "612", // new user
MovieId = "2940"
};
prediction = predictionEngine.Predict(testMovie);
Console.WriteLine($"Expected Rating NaN for unknown user, Predicted: {prediction.Rating}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
Console.WriteLine($"RSquared: {metrics.RSquared}");
}
}
}
1 change: 1 addition & 0 deletions pkg/Microsoft.ML.AutoML/Microsoft.ML.AutoML.nupkgproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<PackageReference Include="Microsoft.ML" Version="1.3.1" />
<PackageReference Include="Microsoft.ML.LightGBM" Version="1.3.1" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't it be LightGbm ?

<PackageReference Include="Microsoft.ML.Mkl.Components" Version="1.3.1" />
<PackageReference Include="Microsoft.ML.Recommender" Version="1.3.1" />
</ItemGroup>

</Project>
35 changes: 35 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,41 @@ public MulticlassClassificationExperiment CreateMulticlassClassificationExperime
return new MulticlassClassificationExperiment(_context, experimentSettings);
}

/// <summary>
/// Creates a new AutoML experiment to run on a recommendation classification dataset.
/// </summary>
/// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
/// <returns>A new AutoML recommendation classification experiment.</returns>
/// <remarks>
/// <para>See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML multiclass classification experiment.</para>
/// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
/// This is because once AutoML starts training an ML.NET model, AutoML lets the
/// model train to completion. For instance, if the first model
/// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
/// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
/// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
/// </remarks>
public RecommendationExperiment CreateRecommendationExperiment(uint maxExperimentTimeInSeconds)
{
return new RecommendationExperiment(_context, new RecommendationExperimentSettings()
{
MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
});
}

/// <summary>
/// Creates a new AutoML experiment to run on a recommendation dataset.
/// </summary>
/// <param name="experimentSettings">Settings for the AutoML experiment.</param>
/// <returns>A new AutoML recommendation experiment.</returns>
/// <remarks>
/// See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML recommendation experiment.
/// </remarks>
public RecommendationExperiment CreateRecommendationExperiment(RecommendationExperimentSettings experimentSettings)
{
return new RecommendationExperiment(_context, experimentSettings);
}

/// <summary>
/// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
/// </summary>
Expand Down
10 changes: 10 additions & 0 deletions src/Microsoft.ML.AutoML/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ public sealed class ColumnInformation
/// <value>The default value is "Label".</value>
public string LabelColumnName { get; set; }

/// <summary>
/// The dataset column to use as a user ID for computation.
/// </summary>
public string UserIdColumnName { get; set; }

/// <summary>
/// The dataset column to use as a item ID for computation.
/// </summary>
public string ItemIdColumnName { get; set; }

/// <summary>
/// The dataset column to use for example weight.
/// </summary>
Expand Down
78 changes: 78 additions & 0 deletions src/Microsoft.ML.AutoML/API/RecommendationExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML
{
/// <summary>
/// Settings for AutoML experiments on recommendation datasets.
/// </summary>
public sealed class RecommendationExperimentSettings : ExperimentSettings
{
/// <summary>
/// Metric that AutoML will try to optimize over the course of the experiment.
/// </summary>
/// <value>The default value is <see cref="RegressionMetric.RSquared"/>.</value>
public RegressionMetric OptimizingMetric { get; set; }

/// <summary>
/// Collection of trainers the AutoML experiment can leverage.
/// </summary>
/// <value>The default value is a collection auto-populated with all possible trainers (all values of <see cref="RecommendationTrainer" />).</value>
public ICollection<RecommendationTrainer> Trainers { get; }

/// <summary>
/// Initializes a new instance of <see cref="RecommendationExperimentSettings"/>.
/// </summary>
public RecommendationExperimentSettings()
{
OptimizingMetric = RegressionMetric.RSquared;
Trainers = Enum.GetValues(typeof(RecommendationTrainer)).OfType<RecommendationTrainer>().ToList();
}
}

/// <summary>
/// Enumeration of ML.NET recommendation trainers used by AutoML.
/// </summary>
public enum RecommendationTrainer
{
MatrixFactorization
}

/// <summary>
/// AutoML experiment on recommendation datasets.
/// </summary>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[RecommendationExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/RecommendationExperiment.cs)]
/// ]]></format>
/// </example>
public sealed class RecommendationExperiment : ExperimentBase<RegressionMetrics, RecommendationExperimentSettings>
{
internal RecommendationExperiment(MLContext context, RecommendationExperimentSettings settings)
: base(context,
new RegressionMetricsAgent(context, settings.OptimizingMetric),
new OptimizingMetricInfo(settings.OptimizingMetric),
settings,
TaskKind.Recommendation,
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}

private protected override CrossValidationRunDetail<RegressionMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

private protected override RunDetail<RegressionMetrics> GetBestRun(IEnumerable<RunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
19 changes: 18 additions & 1 deletion src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML
{
Expand Down Expand Up @@ -47,6 +46,16 @@ internal static class ColumnInformationUtil
return ColumnPurpose.Ignore;
}

if (columnName == columnInfo.UserIdColumnName)
{
return ColumnPurpose.UserId;
}

if (columnName == columnInfo.ItemIdColumnName)
{
return ColumnPurpose.ItemId;
}

return null;
}

Expand Down Expand Up @@ -76,6 +85,12 @@ internal static ColumnInformation BuildColumnInfo(IEnumerable<(string name, Colu
case ColumnPurpose.NumericFeature:
columnInfo.NumericColumnNames.Add(column.name);
break;
case ColumnPurpose.UserId:
columnInfo.UserIdColumnName = column.name;
break;
case ColumnPurpose.ItemId:
columnInfo.ItemIdColumnName = column.name;
break;
case ColumnPurpose.TextFeature:
columnInfo.TextColumnNames.Add(column.name);
break;
Expand All @@ -98,6 +113,8 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
{
var columnNames = new List<string>();
AddStringToListIfNotNull(columnNames, columnInformation.LabelColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.UserIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ItemIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ExampleWeightColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.SamplingKeyColumnName);
AddStringsToListIfNotNull(columnNames, columnInformation.CategoricalColumnNames);
Expand Down
4 changes: 3 additions & 1 deletion src/Microsoft.ML.AutoML/ColumnInference/ColumnPurpose.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ internal enum ColumnPurpose
TextFeature = 4,
Weight = 5,
ImagePath = 6,
SamplingKey = 7
SamplingKey = 7,
UserId = 8,
ItemId = 9
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Experiment/SuggestedTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public SuggestedTrainer Clone()
return new SuggestedTrainer(_mlContext, _trainerExtension, _columnInfo, HyperParamSet?.Clone());
}

public ITrainerEstimator<ISingleFeaturePredictionTransformer<object>, object> BuildTrainer()
public ITrainerEstimator<IPredictionTransformer<object>, object> BuildTrainer()
{
IEnumerable<SweepableParam> sweepParams = null;
if (HyperParamSet != null)
Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<PackageReference Include="Microsoft.ML" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.LightGBM" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.Mkl.Components" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.Recommender" Version="$(MlDotNetPackageVersion)" />
</ItemGroup>

<PropertyGroup>
Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.AutoML/TaskKind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ internal enum TaskKind
BinaryClassification,
MulticlassClassification,
Regression,
Recommendation
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add trail comma :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To keep the code style in format

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I could change but this is cherry pick though. it is without trail comma in master branch as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is code in master. We shouldn't be changing it here. If we want a change like that, make it in the master branch.

Honestly, the only thing this branch should be used for is Code Gen and mlnet.csproj going forward. Until they are merged into master. Then this branch should go away.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace Microsoft.ML.AutoML
{
using ITrainerEstimator = ITrainerEstimator<ISingleFeaturePredictionTransformer<object>, object>;
using ITrainerEstimator = ITrainerEstimator<IPredictionTransformer<object>, object>;

internal class AveragedPerceptronBinaryExtension : ITrainerExtension
{
Expand Down
Loading