Skip to content

AutoML Add Recommendation Task #4246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Oct 17, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6f3d26c
[AutoML] Pull out Code Gen as separate library plus some changes in C…
LittleLittleCloud Aug 25, 2019
7e0f6d0
pack codegen into mlnet
LittleLittleCloud Sep 5, 2019
22edabb
pack codegen into mlnet (#4179)
LittleLittleCloud Sep 9, 2019
50e0dcd
Merge branch 'features/automl' of https://github.com/dotnet/machinele…
LittleLittleCloud Sep 23, 2019
09c56f7
add MatrixFactorization Trainer
LittleLittleCloud Sep 23, 2019
15c58f1
add RecommendationExperiment and other functions
LittleLittleCloud Sep 23, 2019
ac57d9a
some refactor in MatrixFactorization, plus fix small bugs
LittleLittleCloud Sep 24, 2019
c07948f
add LabelFeautre ColumnPurpose and some update
LittleLittleCloud Sep 25, 2019
f182a20
Merge branch 'u/xiaoyun/recommendation'
LittleLittleCloud Sep 25, 2019
9695ffe
add missing Native dll
LittleLittleCloud Sep 25, 2019
b54de14
remove mlnet project
LittleLittleCloud Sep 25, 2019
913b4af
update based on comment
LittleLittleCloud Sep 25, 2019
3fc520c
update example
LittleLittleCloud Sep 26, 2019
2f47c02
Merge branch 'master' into u/xiaoyun/recommendation
maryamariyan Sep 26, 2019
c78efbf
nit: code style
maryamariyan Sep 26, 2019
5864b78
- Rename RecommendationExperimentScenario.MF to RecommendationExperim…
maryamariyan Sep 26, 2019
4010d90
nit: code style/ add space between if and (
maryamariyan Sep 26, 2019
fef926e
Fix compile error
maryamariyan Sep 26, 2019
9c4852c
minor fixes
maryamariyan Oct 7, 2019
74cbc5c
First stage changes
maryamariyan Oct 14, 2019
7e7c272
change signature for ITrainerEstimator
maryamariyan Oct 15, 2019
17500cf
Adding tests, checking code coverage
maryamariyan Oct 16, 2019
b882ee1
cleanup + improve SweepParams, taken from MatrixFactorizationTrainer
maryamariyan Oct 16, 2019
d7a272d
Address PR feedback - part1
maryamariyan Oct 16, 2019
b69d9c3
Apply PR feedbacks - Part 2
maryamariyan Oct 16, 2019
f9c6abb
Update test to reflect change made to sweep params
maryamariyan Oct 16, 2019
7d856c8
Apply PR feedbacks: Part 3
maryamariyan Oct 16, 2019
7852c5e
Adds more sweepable params and test
maryamariyan Oct 16, 2019
f889fa5
Rename to UserId/ItemId
maryamariyan Oct 16, 2019
2ec0649
Rename User/Item ID: part 2
maryamariyan Oct 16, 2019
c39ae94
- Removing SamplingKey for first iteration
maryamariyan Oct 16, 2019
7186280
Apply review comments
maryamariyan Oct 17, 2019
d3d6b4a
Minor rename
maryamariyan Oct 17, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
some refactor in MatrixFactorization, plus fix small bugs
  • Loading branch information
LittleLittleCloud committed Sep 24, 2019
commit ac57d9a8d8cfb9c87a6df1c43feb96eca7b078e2
1 change: 1 addition & 0 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
https://dotnetfeed.blob.core.windows.net/dotnet-core/index.json;
https://dotnet.myget.org/F/dotnet-core/api/v3/index.json;
https://dotnet.myget.org/F/roslyn-analyzers/api/v3/index.json;
C:\Users\xiaoyuz\source\repos\image\machinelearning\bin\packages;
</RestoreSources>
</PropertyGroup>

Expand Down
22 changes: 22 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/Movie.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples.DataStructures
{
public class Movie
{
[LoadColumn(0)]
public string userId;

[LoadColumn(1)]
public string movieId;

[LoadColumn(2)]
public float rating;

[LoadColumn(3)]
public float timestamp;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.IO;
using System.Linq;
using Microsoft.ML.AutoML;
using Microsoft.ML.AutoML.Samples.DataStructures;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
Expand All @@ -19,8 +20,8 @@ public static void Run()
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');
IDataView trainDataView = mlContext.Data.LoadFromTextFile<Movie>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<Movie>(TestDataPath, hasHeader: true, separatorChar: ',');

var settings = new RecommendationExperimentSettings(RecommendationExperimentScenario.MF, "userId", "movieId");
// STEP 2: Run AutoML experiment
Expand All @@ -47,17 +48,13 @@ public static void Run()
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);
var predictionEngine = mlContext.Model.CreatePredictionEngine<Movie, TaxiTripFarePrediction>(bestRun.Model);

// STEP 8: Initialize a new test taxi trip, and get the predicted fare
var testTaxiTrip = new TaxiTrip
var testTaxiTrip = new Movie
{
VendorId = "VTS",
RateCode = 1,
PassengerCount = 1,
TripTimeInSeconds = 1140,
TripDistance = 3.75f,
PaymentType = "CRD"
userId="1",
movieId = "1097",
};
var prediction = predictionEngine.Predict(testTaxiTrip);
Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public sealed class AutoCatalog
/// <summary>
/// save some intermediate value
/// </summary>
public static Dictionary<string, object> ValuePairs { get; set; }
public static Dictionary<string, object> ValuePairs { get; set; } = new Dictionary<string, object>();

internal AutoCatalog(MLContext context)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.LightGBM" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.Mkl.Components" Version="$(MlDotNetPackageVersion)" />
<PackageReference Include="Microsoft.ML.Recommender" Version="0.16.0-preview2-28122-0" />
</ItemGroup>

<PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ internal class TrainerExtensionCatalog
{ TrainerName.SgdCalibratedBinary, typeof(SgdCalibratedBinaryExtension) },
{ TrainerName.SgdCalibratedOva, typeof(SgdCalibratedOvaExtension) },
{ TrainerName.SymbolicSgdLogisticRegressionBinary, typeof(SymbolicSgdLogisticRegressionBinaryExtension) },
{ TrainerName.SymbolicSgdLogisticRegressionOva, typeof(SymbolicSgdLogisticRegressionOvaExtension) }
{ TrainerName.SymbolicSgdLogisticRegressionOva, typeof(SymbolicSgdLogisticRegressionOvaExtension) },
{ TrainerName.MatrixFactorization, typeof(MatrixFactorizationExtension) },
};

private static readonly IDictionary<Type, TrainerName> _extensionTypesToTrainerNames =
Expand Down Expand Up @@ -72,6 +73,10 @@ public static IEnumerable<ITrainerExtension> GetTrainers(TaskKind task,
{
trainers = GetRegressionLearners();
}
else if (task == TaskKind.Recommendation)
{
trainers = GetRecommendationLearners();
}
else
{
// should not be possible to reach here
Expand Down Expand Up @@ -134,5 +139,13 @@ private static IEnumerable<ITrainerExtension> GetRegressionLearners()
new OlsRegressionExtension(),
};
}

private static IEnumerable<ITrainerExtension> GetRecommendationLearners()
{
return new ITrainerExtension[]
{
new MatrixFactorizationExtension(),
};
}
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Utils/UserInputValidationUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ private static IEnumerable<DataViewType> GetAllowedLabelTypes(TaskKind task)
// (if input label is not already a key) before invoking the trainer.
case TaskKind.MulticlassClassification:
return null;
case TaskKind.Regression:
case TaskKind.Regression | TaskKind.Recommendation:
return new DataViewType[] { NumberDataViewType.Single };
default:
throw new NotSupportedException($"Unsupported task type: {task}");
Expand Down
6 changes: 5 additions & 1 deletion src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -422,14 +422,18 @@ DataViewRow ISchemaBoundRowMapper.GetRow(DataViewRow input, IEnumerable<DataView
/// <summary>
/// Trains a <see cref="MatrixFactorizationModelParameters"/>. It factorizes the training matrix into the product of two low-rank matrices.
/// </summary>
public sealed class MatrixFactorizationPredictionTransformer : PredictionTransformerBase<MatrixFactorizationModelParameters>
public sealed class MatrixFactorizationPredictionTransformer : PredictionTransformerBase<MatrixFactorizationModelParameters>, ISingleFeaturePredictionTransformer<MatrixFactorizationModelParameters>
{
internal const string LoaderSignature = "MaFactPredXf";
internal string MatrixColumnIndexColumnName { get; }
internal string MatrixRowIndexColumnName { get; }
internal DataViewType MatrixColumnIndexColumnType { get; }
internal DataViewType MatrixRowIndexColumnType { get; }

public string FeatureColumnName => throw new NotImplementedException();

public DataViewType FeatureColumnType => throw new NotImplementedException();

/// <summary>
/// Build a transformer based on matrix factorization predictor (model) and the input schema (trainSchema). The created
/// transformer can only transform IDataView objects compatible to the input schema; that is, that IDataView must contain
Expand Down
16 changes: 5 additions & 11 deletions src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ namespace Microsoft.ML.Trainers
/// </format>
/// </example>
public sealed class MatrixFactorizationTrainer : ITrainer<MatrixFactorizationModelParameters>,
IEstimator<MatrixFactorizationPredictionTransformer>
IEstimator<MatrixFactorizationPredictionTransformer>, ITrainerEstimator<MatrixFactorizationPredictionTransformer, MatrixFactorizationModelParameters>
{
/// <summary>
/// Type of loss function.
Expand All @@ -111,7 +111,7 @@ public enum LossFunctionType
/// <summary>
/// Advanced options for the <see cref="MatrixFactorizationTrainer"/>.
/// </summary>
public sealed class Options
public sealed class Options : TrainerInputBaseWithLabel
{
/// <summary>
/// The name of variable (i.e., Column in a <see cref="IDataView"/> type system) used as matrix's column index.
Expand All @@ -123,11 +123,6 @@ public sealed class Options
/// </summary>
public string MatrixRowIndexColumnName;

/// <summary>
/// The name variable (i.e., column in a <see cref="IDataView"/> type system) used as matrix's element value.
/// </summary>
public string LabelColumnName;

/// <summary>
/// Loss function minimized for finding factor matrices.
/// </summary>
Expand Down Expand Up @@ -302,9 +297,8 @@ internal static class Defaults
/// <summary>
/// The <see cref="TrainerInfo"/> contains general parameters for this trainer.
/// </summary>
TrainerInfo ITrainer.Info => _info;

private readonly TrainerInfo _info;
public TrainerInfo Info { get; set; }

/// <summary>
/// Initializes a new instance of <see cref="MatrixFactorizationTrainer"/> through the <see cref="Options"/> class.
Expand Down Expand Up @@ -337,7 +331,7 @@ internal MatrixFactorizationTrainer(IHostEnvironment env, Options options)
_quiet = options.Quiet;
_doNmf = options.NonNegative;

_info = new TrainerInfo(normalization: false, caching: false);
Info = new TrainerInfo(normalization: false, caching: false);

LabelName = options.LabelColumnName;
MatrixColumnIndexName = options.MatrixColumnIndexColumnName;
Expand Down Expand Up @@ -379,7 +373,7 @@ internal MatrixFactorizationTrainer(IHostEnvironment env,
_quiet = args.Quiet;
_doNmf = args.NonNegative;

_info = new TrainerInfo(normalization: false, caching: false);
Info = new TrainerInfo(normalization: false, caching: false);

LabelName = labelColumnName;
MatrixColumnIndexName = matrixColumnIndexColumnName;
Expand Down
1 change: 0 additions & 1 deletion src/Microsoft.ML.TensorFlow/TensorFlow/TensorGeneric.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

using System;
using System.Text;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.Transforms.TensorFlow
{
Expand Down