Skip to content

Fix NullReferenceException when it comes to Recommendation in AutoML and CodeGenerator #4774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -329,18 +329,11 @@ private string GenerateConsumeModelCSFileContent(string namespaceValue)
{
if (_pipeline == null)
throw new ArgumentNullException(nameof(_pipeline));
try
{
var node = _pipeline.Nodes.Where(t => t.NodeType == PipelineNodeType.Trainer).First();
ITrainerGenerator generator = TrainerGeneratorFactory.GetInstance(node);
var trainerString = generator.GenerateTrainer();
var trainerUsings = generator.GenerateUsings();
return (trainerString, trainerUsings);
}
catch (Exception)
{
return (string.Empty, new string[0]);
}
var node = _pipeline.Nodes.Where(t => t.NodeType == PipelineNodeType.Trainer).First();
ITrainerGenerator generator = TrainerGeneratorFactory.GetInstance(node);
var trainerString = generator.GenerateTrainer();
var trainerUsings = generator.GenerateUsings();
return (trainerString, trainerUsings);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why the try/catch was needed. If no one else knows, we may want drill down in the code, or run a variety of datasets thru the CLI to show that nothing is expected to throw.

The old catch seem to eat the error without a hint of what went wrong.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's why I removed that part of code

}

internal IList<string> GenerateClassLabels(IDictionary<string, CodeGeneratorSettings.ColumnMapping> columnMapping = default)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ private void Initialize(PipelineNode node)
value = "\"" + val + "\"";
}

if (type.IsEnum)
{
//example: "MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression"
value = $"{type.ReflectedType.Name}.{type.Name}.{kv.Value.ToString()}";
}

if (type == typeof(CustomProperty))
{
value = kv.Value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using TestNamespace.Model;
using Microsoft.ML.Trainers;

namespace TestNamespace.ConsoleApp
{
Expand Down Expand Up @@ -55,9 +56,10 @@ namespace TestNamespace.ConsoleApp
public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext)
{
// Data process configuration with pipeline data transformations
var dataProcessPipeline = mlContext.Transforms.Concatenate("Out", new[] { "In" });
var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("userId", "userId")
.Append(mlContext.Transforms.Conversion.MapValueToKey("movieId", "movieId"));
// Set the training algorithm
var trainer = mlContext.Recommendation().Trainers.MatrixFactorization(labelColumnName: "Label", matrixColumnIndexColumnName: "userId", matrixRowIndexColumnName: "movieId");
var trainer = mlContext.Recommendation().Trainers.MatrixFactorization(new MatrixFactorizationTrainer.Options() { MatrixColumnIndexColumnName = "userId", MatrixRowIndexColumnName = "movieId", LabelColumnName = "Label", NumberOfIterations = 10, LearningRate = 0.01f, ApproximationRank = 8, Lambda = 0.01f, LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression, Alpha = 1f, C = 1E-05f });

var trainingPipeline = dataProcessPipeline.Append(trainer);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
using Microsoft.ML.CodeGenerator.Templates.Console;
using Microsoft.ML.CodeGenerator.Utilities;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Xunit;
using CodeGenerator = Microsoft.ML.CodeGenerator.CSharp.CodeGenerator;

Expand Down Expand Up @@ -466,16 +467,30 @@ private CodeGenerator PrepareForRecommendationTask()
if (mockedPipeline == null)
{
MLContext context = new MLContext();
var hyperParam = new Dictionary<string, object>()
{
{"MatrixColumnIndexColumnName","userId" },
{"MatrixRowIndexColumnName","movieId" },
{"LabelColumnName","Label" },
{nameof(MatrixFactorizationTrainer.Options.NumberOfIterations), 10 },
{nameof(MatrixFactorizationTrainer.Options.LearningRate), 0.01f },
{nameof(MatrixFactorizationTrainer.Options.ApproximationRank), 8 },
{nameof(MatrixFactorizationTrainer.Options.Lambda), 0.01f },
{nameof(MatrixFactorizationTrainer.Options.LossFunction), MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression },
{nameof(MatrixFactorizationTrainer.Options.Alpha), 1f },
{nameof(MatrixFactorizationTrainer.Options.C), 0.00001f },
};
var valueToKeyPipelineNode1 = new PipelineNode(nameof(EstimatorName.ValueToKeyMapping), PipelineNodeType.Transform, "userId", "userId");
var valueToKeyPipelineNode2 = new PipelineNode(nameof(EstimatorName.ValueToKeyMapping), PipelineNodeType.Transform, "movieId", "movieId");
var matrixPipelineNode = new PipelineNode(nameof(TrainerName.MatrixFactorization), PipelineNodeType.Trainer, "Features", "Score", hyperParam);
var pipeline = new Pipeline(new PipelineNode[]
{
valueToKeyPipelineNode1,
valueToKeyPipelineNode2,
matrixPipelineNode
});

var trainer1 = new SuggestedTrainer(context, new MatrixFactorizationExtension(), new ColumnInformation() {
LabelColumnName = "Label",
UserIdColumnName = "userId",
ItemIdColumnName = "movieId",
}, hyperParamSet: null);
var transforms1 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
var inferredPipeline1 = new SuggestedPipeline(transforms1, new List<SuggestedTransform>(), trainer1, context, false);

mockedPipeline = inferredPipeline1.ToPipeline();
mockedPipeline = pipeline;
var textLoaderArgs = new TextLoader.Options()
{
Columns = new[] {
Expand Down
12 changes: 10 additions & 2 deletions test/Microsoft.ML.CodeGenerator.Tests/TrainerGeneratorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.CodeGenerator.CSharp;
using Microsoft.ML.Trainers;
using Xunit;

namespace mlnet.Tests
Expand Down Expand Up @@ -295,14 +296,21 @@ public void MatrixFactorizationAdvancedTest()
{"MatrixColumnIndexColumnName","userId" },
{"MatrixRowIndexColumnName","movieId" },
{"LabelColumnName","rating" },
{nameof(MatrixFactorizationTrainer.Options.NumberOfIterations), 10 },
{nameof(MatrixFactorizationTrainer.Options.LearningRate), 0.01f },
{nameof(MatrixFactorizationTrainer.Options.ApproximationRank), 8 },
{nameof(MatrixFactorizationTrainer.Options.Lambda), 0.01f },
{nameof(MatrixFactorizationTrainer.Options.LossFunction), MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression },
{nameof(MatrixFactorizationTrainer.Options.Alpha), 1f },
{nameof(MatrixFactorizationTrainer.Options.C), 0.00001f },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding the MF options to a test.

};
PipelineNode node = new PipelineNode("MatrixFactorization", PipelineNodeType.Trainer, default(string[]), default(string), elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, null, null);
var actual = codeGenerator.GenerateTrainerAndUsings();
string expectedTrainerString = "MatrixFactorization(matrixColumnIndexColumnName:\"userId\",matrixRowIndexColumnName:\"movieId\",labelColumnName:\"rating\")";
string expectedTrainerString = "MatrixFactorization(new MatrixFactorizationTrainer.Options(){MatrixColumnIndexColumnName=\"userId\",MatrixRowIndexColumnName=\"movieId\",LabelColumnName=\"rating\",NumberOfIterations=10,LearningRate=0.01f,ApproximationRank=8,Lambda=0.01f,LossFunction=MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression,Alpha=1f,C=1E-05f})";
Assert.Equal(expectedTrainerString, actual.Item1);
Assert.Null(actual.Item2);
Assert.Equal(new string[] { "using Microsoft.ML.Trainers;\r\n" },actual.Item2);
}

[Fact]
Expand Down