Skip to content

AutoML Add Recommendation Task #4246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Oct 17, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6f3d26c
[AutoML] Pull out Code Gen as separate library plus some changes in C…
LittleLittleCloud Aug 25, 2019
7e0f6d0
pack codegen into mlnet
LittleLittleCloud Sep 5, 2019
22edabb
pack codegen into mlnet (#4179)
LittleLittleCloud Sep 9, 2019
50e0dcd
Merge branch 'features/automl' of https://github.com/dotnet/machinele…
LittleLittleCloud Sep 23, 2019
09c56f7
add MatrixFactorization Trainer
LittleLittleCloud Sep 23, 2019
15c58f1
add RecommendationExperiment and other functions
LittleLittleCloud Sep 23, 2019
ac57d9a
some refactor in MatrixFactorization, plus fix small bugs
LittleLittleCloud Sep 24, 2019
c07948f
add LabelFeautre ColumnPurpose and some update
LittleLittleCloud Sep 25, 2019
f182a20
Merge branch 'u/xiaoyun/recommendation'
LittleLittleCloud Sep 25, 2019
9695ffe
add missing Native dll
LittleLittleCloud Sep 25, 2019
b54de14
remove mlnet project
LittleLittleCloud Sep 25, 2019
913b4af
update based on comment
LittleLittleCloud Sep 25, 2019
3fc520c
update example
LittleLittleCloud Sep 26, 2019
2f47c02
Merge branch 'master' into u/xiaoyun/recommendation
maryamariyan Sep 26, 2019
c78efbf
nit: code style
maryamariyan Sep 26, 2019
5864b78
- Rename RecommendationExperimentScenario.MF to RecommendationExperim…
maryamariyan Sep 26, 2019
4010d90
nit: code style/ add space between if and (
maryamariyan Sep 26, 2019
fef926e
Fix compile error
maryamariyan Sep 26, 2019
9c4852c
minor fixes
maryamariyan Oct 7, 2019
74cbc5c
First stage changes
maryamariyan Oct 14, 2019
7e7c272
change signature for ITrainerEstimator
maryamariyan Oct 15, 2019
17500cf
Adding tests, checking code coverage
maryamariyan Oct 16, 2019
b882ee1
cleanup + improve SweepParams, taken from MatrixFactorizationTrainer
maryamariyan Oct 16, 2019
d7a272d
Address PR feedback - part1
maryamariyan Oct 16, 2019
b69d9c3
Apply PR feedbacks - Part 2
maryamariyan Oct 16, 2019
f9c6abb
Update test to reflect change made to sweep params
maryamariyan Oct 16, 2019
7d856c8
Apply PR feedbacks: Part 3
maryamariyan Oct 16, 2019
7852c5e
Adds more sweepable params and test
maryamariyan Oct 16, 2019
f889fa5
Rename to UserId/ItemId
maryamariyan Oct 16, 2019
2ec0649
Rename User/Item ID: part 2
maryamariyan Oct 16, 2019
c39ae94
- Removing SamplingKey for first iteration
maryamariyan Oct 16, 2019
7186280
Apply review comments
maryamariyan Oct 17, 2019
d3d6b4a
Minor rename
maryamariyan Oct 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add RecommendationExperiment and other functions
  • Loading branch information
LittleLittleCloud committed Sep 23, 2019
commit 15c58f1c466936d8824aaf77696c0f61eb2a416e
3 changes: 3 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ public static void Main(string[] args)
{
try
{
RecommendationExperiment.Run();
Console.Clear();

RegressionExperiment.Run();
Console.Clear();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
using System;
using System.IO;
using System.Linq;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class RecommendationExperiment
{
private static string TrainDataPath = @"C:\Users\xiaoyuz\Desktop\machinelearning-samples\datasets\recommendation-ratings-train.csv";
private static string TestDataPath = @"C:\Users\xiaoyuz\Desktop\machinelearning-samples\datasets\recommendation-ratings-test.csv";
private static string ModelPath = @"C:\Users\xiaoyuz\source\test\recommendation.zip";
private static string LabelColumnName = "rating";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

var settings = new RecommendationExperimentSettings(RecommendationExperimentScenario.MF, "userId", "movieId");
// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
.CreateRecommendationExperiment(settings)
.Execute(trainDataView, LabelColumnName);

// STEP 3: Print metric from best model
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
using (FileStream fs = File.Create(ModelPath))
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);

// STEP 8: Initialize a new test taxi trip, and get the predicted fare
var testTaxiTrip = new TaxiTrip
{
VendorId = "VTS",
RateCode = 1,
PassengerCount = 1,
TripTimeInSeconds = 1140,
TripDistance = 3.75f,
PaymentType = "CRD"
};
var prediction = predictionEngine.Predict(testTaxiTrip);
Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
Console.WriteLine($"RSquared: {metrics.RSquared}");
}
}
}
7 changes: 6 additions & 1 deletion src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public sealed class AutoCatalog
/// <summary>
/// save some intermediate value
/// </summary>
public static Dictionary<string, object> ValuePairs { get; set; };
public static Dictionary<string, object> ValuePairs { get; set; }

internal AutoCatalog(MLContext context)
{
Expand Down Expand Up @@ -129,6 +129,11 @@ public MulticlassClassificationExperiment CreateMulticlassClassificationExperime
return new MulticlassClassificationExperiment(_context, experimentSettings);
}

public RecommendationExperiment CreateRecommendationExperiment(RecommendationExperimentSettings experimentSettings)
{
return new RecommendationExperiment(_context, experimentSettings);
}

/// <summary>
/// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
/// </summary>
Expand Down
74 changes: 74 additions & 0 deletions src/Microsoft.ML.AutoML/API/RecommendationExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.AutoML
{
public enum RecommendationExperimentScenario
{
MF,
}

public sealed class RecommendationExperimentSettings : ExperimentSettings
{
public RecommendationExperimentScenario Scenerio { get; set; }

public string MatrixColumnIndexColumnName { get; set; }

public string MatrixRowIndexColumnName { get; set; }

// We can use RegressionMetric as evaluation Metric
public RegressionMetric OptimizingMetric { get; set; }

public ICollection<RecommendationTrainer> Trainers { get; }

public RecommendationExperimentSettings(RecommendationExperimentScenario scenario, string columnIndexName, string rowIndexName)
: this()
{
if(scenario == RecommendationExperimentScenario.MF)
{
AutoCatalog.ValuePairs[nameof(MatrixFactorizationTrainer.Options.MatrixColumnIndexColumnName)] = columnIndexName;
AutoCatalog.ValuePairs[nameof(MatrixFactorizationTrainer.Options.MatrixRowIndexColumnName)] = rowIndexName;
return;
}
throw new NotImplementedException();
}

private RecommendationExperimentSettings()
{
OptimizingMetric = RegressionMetric.RSquared;
Trainers = Enum.GetValues(typeof(RecommendationTrainer)).OfType<RecommendationTrainer>().ToList();
}
}

public enum RecommendationTrainer
{
MatrixFactorization,
}

public sealed class RecommendationExperiment : ExperimentBase<RegressionMetrics, RecommendationExperimentSettings>
{
internal RecommendationExperiment(MLContext context, RecommendationExperimentSettings settings)
: base(context,
new RegressionMetricsAgent(context, settings.OptimizingMetric),
new OptimizingMetricInfo(settings.OptimizingMetric),
settings,
TaskKind.Recommendation,
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}
private protected override CrossValidationRunDetail<RegressionMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

private protected override RunDetail<RegressionMetrics> GetBestRun(IEnumerable<RunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.Recommender;

namespace Microsoft.ML.AutoML
{
Expand All @@ -12,9 +14,8 @@ internal class MatrixFactorizationExtension : ITrainerExtension
public ITrainerEsitmator CreateInstance(MLContext mlContext, IEnumerable<SweepableParam> sweepParams, ColumnInformation columnInfo)
{
// TODO
// MatrixFactorizationTrainer.Options should inheriat from ABC TrainerInputBaseWithGroupId
var options = TrainerExtensionUtil.CreateOptions<MatrixFactorizationTrainer.Options>(sweepParams);
options.LabelColumnName = columnInfo.LabelColumnName;
// MatrixFactorizationTrainer.Options should inheriat from ABC TrainerInputBaseWithGroupId
var options = TrainerExtensionUtil.CreateOptions<MatrixFactorizationTrainer.Options>(sweepParams, columnInfo.LabelColumnName);
options.MatrixColumnIndexColumnName = (string)AutoCatalog.ValuePairs[nameof(options.MatrixColumnIndexColumnName)];
options.MatrixRowIndexColumnName = (string)AutoCatalog.ValuePairs[nameof(options.MatrixRowIndexColumnName)];
return mlContext.Recommendation().Trainers.MatrixFactorization(options);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ public static IEnumerable<SweepableParam> BuildMatrixFactorizationParmas()
new SweepableDiscreteParam(nameof(MatrixFactorizationTrainer.Options.ApproximationRank), new object[] { 10, 20, 50, 100, 150, 200 }),
};
}

public static IEnumerable<SweepableParam> BuildLinearSvmParams()
{
return new SweepableParam[] {
Expand Down
30 changes: 19 additions & 11 deletions src/Microsoft.ML.AutoML/TrainerExtensions/TrainerExtensionUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ internal enum TrainerName
SgdCalibratedBinary,
SgdCalibratedOva,
SymbolicSgdLogisticRegressionBinary,
SymbolicSgdLogisticRegressionOva
SymbolicSgdLogisticRegressionOva,
MatrixFactorization,
}

internal static class TrainerExtensionUtil
Expand All @@ -60,16 +61,6 @@ public static T CreateOptions<T>(IEnumerable<SweepableParam> sweepParams, string
return options;
}

public static T CreateOptions<T>(IEnumerable<SweepableParam> sweepParams) where T : class
{
var options = Activator.CreateInstance<T>();
if (sweepParams != null)
{
UpdateFields(options, sweepParams);
}
return options;
}

private static string[] _lightGbmBoosterParamNames = new[] { "L2Regularization", "L1Regularization" };
private const string LightGbmBoosterPropName = "Booster";

Expand Down Expand Up @@ -373,6 +364,18 @@ public static TrainerName GetTrainerName(RegressionTrainer regressionTrainer)
throw new NotSupportedException($"{regressionTrainer} not supported");
}

public static TrainerName GetTrainerName(RecommendationTrainer recommendationTrainer)
{
switch (recommendationTrainer)
{
case RecommendationTrainer.MatrixFactorization:
return TrainerName.MatrixFactorization;
}

// never expected to reach here
throw new NotSupportedException($"{recommendationTrainer} not supported");
}

public static IEnumerable<TrainerName> GetTrainerNames(IEnumerable<BinaryClassificationTrainer> binaryTrainers)
{
return binaryTrainers?.Select(t => GetTrainerName(t));
Expand All @@ -387,5 +390,10 @@ public static IEnumerable<TrainerName> GetTrainerNames(IEnumerable<RegressionTra
{
return regressionTrainers?.Select(t => GetTrainerName(t));
}

public static IEnumerable<TrainerName> GetTrainerNames(IEnumerable<RecommendationTrainer> recommendationTrainers)
{
return recommendationTrainers?.Select(t => GetTrainerName(t));
}
}
}