Skip to content

Simplify CodeGen - phase 2 #4972

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ public AzureAttachConsoleAppCodeGenerator(Pipeline pipeline, ColumnInferenceResu
Name = $"{_settings.OutputName}.ConsoleApp.csproj",
};

var columns = _columnInferenceResult.TextLoaderOptions.Columns;
var featuresList = columns.Where((str) => str.Name != _settings.LabelName).Select((str) => str.Name).ToList();
var sampleResult = Utils.GenerateSampleData(_settings.TrainDataset, _columnInferenceResult);
PredictProgram = new CSharpCodeFile()
{
Expand All @@ -94,7 +92,6 @@ public AzureAttachConsoleAppCodeGenerator(Pipeline pipeline, ColumnInferenceResu
HasHeader = _columnInferenceResult.TextLoaderOptions.HasHeader,
Separator = _columnInferenceResult.TextLoaderOptions.Separators.FirstOrDefault(),
Target = _settings.Target,
Features = featuresList,
SampleData = sampleResult,
}.TransformText(),
Name = "Program.cs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,6 @@ private static string GeneratPredictProjectFileContent(string namespaceValue, bo

private string GeneratePredictProgramCSFileContent(string namespaceValue)
{
var columns = _columnInferenceResult.TextLoaderOptions.Columns;
var featuresList = columns.Where((str) => str.Name != _settings.LabelName).Select((str) => str.Name).ToList();
var sampleData = Utils.GenerateSampleData(_settings.TrainDataset, _columnInferenceResult);
PredictProgram predictProgram = new PredictProgram()
{
Expand All @@ -444,7 +442,6 @@ private string GeneratePredictProgramCSFileContent(string namespaceValue)
Separator = _columnInferenceResult.TextLoaderOptions.Separators.FirstOrDefault(),
AllowQuoting = _columnInferenceResult.TextLoaderOptions.AllowQuoting,
AllowSparse = _columnInferenceResult.TextLoaderOptions.AllowSparse,
Features = featuresList,
Target = _settings.Target,
SampleData = sampleData,
};
Expand Down
21 changes: 12 additions & 9 deletions src/Microsoft.ML.CodeGenerator/Templates/Console/ModelBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,24 @@ public static void CreateModel()
this.Write(this.ToStringHelper.ToStringWithCulture(AllowSparse.ToString().ToLowerInvariant()));
this.Write(");\r\n");
}
this.Write(" // Build training pipeline\r\n IEstimator<ITransformer> trai" +
"ningPipeline = BuildTrainingPipeline(mlContext);\r\n\r\n");
this.Write(@" // Build training pipeline
IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

// Train Model
ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

");
if(string.IsNullOrEmpty(TestPath) && !HasOnnxModel){
this.Write(" // Evaluate quality of Model\r\n Evaluate(mlContext, trainin" +
"gDataView, trainingPipeline);\r\n\r\n");
}
this.Write(" // Train Model\r\n ITransformer mlModel = TrainModel(mlConte" +
"xt, trainingDataView, trainingPipeline);\r\n");
if(!string.IsNullOrEmpty(TestPath) && !HasOnnxModel){
this.Write("\r\n // Evaluate quality of Model\r\n EvaluateModel(mlContext, " +
"mlModel, testDataView);\r\n");
this.Write(" // Evaluate quality of Model\r\n EvaluateModel(mlContext, ml" +
"Model, testDataView);\r\n\r\n");
}
this.Write("\r\n // Save model\r\n SaveModel(mlContext, mlModel, MODEL_FILE" +
"PATH, trainingDataView.Schema);\r\n }\r\n\r\n public static IEstimator<I" +
"Transformer> BuildTrainingPipeline(MLContext mlContext)\r\n {\r\n");
this.Write(" // Save model\r\n SaveModel(mlContext, mlModel, MODEL_FILEPA" +
"TH, trainingDataView.Schema);\r\n }\r\n\r\n public static IEstimator<ITr" +
"ansformer> BuildTrainingPipeline(MLContext mlContext)\r\n {\r\n");
if(PreTrainerTransforms.Count >0 ) {
this.Write(" // Data process configuration with pipeline data transformations \r\n " +
" var dataProcessPipeline = ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,19 @@ namespace <#= Namespace #>.ConsoleApp
// Build training pipeline
IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

// Train Model
ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

<# if(string.IsNullOrEmpty(TestPath) && !HasOnnxModel){ #>
// Evaluate quality of Model
Evaluate(mlContext, trainingDataView, trainingPipeline);

<#}#>
// Train Model
ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);
<# if(!string.IsNullOrEmpty(TestPath) && !HasOnnxModel){ #>

// Evaluate quality of Model
EvaluateModel(mlContext, mlModel, testDataView);
<#}#>

<#}#>
// Save model
SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,15 @@ public virtual string TransformText()
this.Write(" with predicted ");
this.Write(this.ToStringHelper.ToStringWithCulture(Utils.Normalize(LabelName)));
this.Write(" from sample data...\\n\\n\");\r\n");
foreach(var label in Features){
if(SampleData != null) {
foreach(var kv in SampleData){
this.Write("\t\t\tConsole.WriteLine($\"");
this.Write(this.ToStringHelper.ToStringWithCulture(label));
this.Write(this.ToStringHelper.ToStringWithCulture(kv.Key));
this.Write(": {sampleData.");
this.Write(this.ToStringHelper.ToStringWithCulture(Utils.Normalize(label)));
this.Write(this.ToStringHelper.ToStringWithCulture(kv.Key));
this.Write("}\");\r\n");
}
}
if("BinaryClassification".Equals(TaskType) ){
this.Write("\t\t\tConsole.WriteLine($\"\\n\\nPredicted ");
this.Write(this.ToStringHelper.ToStringWithCulture(Utils.Normalize(LabelName)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ namespace <#= Namespace #>.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual <#= Utils.Normalize(LabelName) #> with predicted <#= Utils.Normalize(LabelName) #> from sample data...\n\n");
<#foreach(var label in Features){#>
Console.WriteLine($"<#= label #>: {sampleData.<#= Utils.Normalize(label) #>}");
<# if(SampleData != null) {#>
<#foreach(var kv in SampleData){#>
Console.WriteLine($"<#= kv.Key #>: {sampleData.<#= kv.Key #>}");
<#}#>
<#}#>
<#if("BinaryClassification".Equals(TaskType) ){ #>
Console.WriteLine($"\n\nPredicted <#= Utils.Normalize(LabelName) #>: {predictionResult.Prediction}\n\n");
Expand Down
6 changes: 6 additions & 0 deletions src/Microsoft.ML.CodeGenerator/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ internal static string Sanitize(string name)
return string.Join("", name.Select(x => Char.IsLetterOrDigit(x) ? x : '_'));
}

/// <summary>
/// Take the first line of data from inputFile and parse it as a dictionary using schema from columnInference.
/// </summary>
/// <param name="inputFile">path to input file.</param>
/// <param name="columnInference">Column Inferernce Result.</param>
/// <returns>A dictionary which key is sanitized column name and value is first line of data.</returns>
internal static IDictionary<string, string> GenerateSampleData(string inputFile, ColumnInferenceResults columnInference)
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@ namespace Test.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual Label with predicted Label from sample data...\n\n");
Console.WriteLine($"Age: {sampleData.Age}");
Console.WriteLine($"Workclass: {sampleData.Workclass}");
Console.WriteLine($"Fnlwgt: {sampleData.Fnlwgt}");
Console.WriteLine($"Education: {sampleData.Education}");
Console.WriteLine($"Education_num: {sampleData.Education_num}");
Console.WriteLine($"Marital_status: {sampleData.Marital_status}");
Console.WriteLine($"Occupation: {sampleData.Occupation}");
Console.WriteLine($"Relationship: {sampleData.Relationship}");
Console.WriteLine($"Race: {sampleData.Race}");
Console.WriteLine($"Sex: {sampleData.Sex}");
Console.WriteLine($"Capital_gain: {sampleData.Capital_gain}");
Console.WriteLine($"Capital_loss: {sampleData.Capital_loss}");
Console.WriteLine($"Hours_per_week: {sampleData.Hours_per_week}");
Console.WriteLine($"Native_country: {sampleData.Native_country}");
Console.WriteLine($"\n\nPredicted Label value {predictionResult.Prediction} \nPredicted Label scores: [{String.Join(",", predictionResult.Score)}]\n\n");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ namespace CodeGenTest.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual Label with predicted Label from sample data...\n\n");
Console.WriteLine($"ImageSource: {sampleData.ImageSource}");
Console.WriteLine($"\n\nPredicted Label value {predictionResult.Prediction} \nPredicted Label scores: [{String.Join(",", predictionResult.Score)}]\n\n");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ namespace TestNamespace.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual Label with predicted Label from sample data...\n\n");
Console.WriteLine($"col1: {sampleData.Col1}");
Console.WriteLine($"col2: {sampleData.Col2}");
Console.WriteLine($"col3: {sampleData.Col3}");
Console.WriteLine($"col4: {sampleData.Col4}");
Console.WriteLine($"col5: {sampleData.Col5}");
Console.WriteLine($"\n\nPredicted Label: {predictionResult.Prediction}\n\n");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ namespace TestNamespace.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual Label with predicted Label from sample data...\n\n");
Console.WriteLine($"col1: {sampleData.Col1}");
Console.WriteLine($"col2: {sampleData.Col2}");
Console.WriteLine($"col3: {sampleData.Col3}");
Console.WriteLine($"col4: {sampleData.Col4}");
Console.WriteLine($"col5: {sampleData.Col5}");
Console.WriteLine($"\n\nPredicted Label: {predictionResult.Prediction}\n\n");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ namespace TestNamespace.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual Label with predicted Label from sample data...\n\n");
Console.WriteLine($"userId: {sampleData.UserId}");
Console.WriteLine($"movieId: {sampleData.MovieId}");
Console.WriteLine($"\n\nPredicted Label: {predictionResult.Score}\n\n");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// This file was auto-generated by ML.NET Model Builder.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Namespace.Model;
Using package1
namespace Namespace.ConsoleApp
{
public static class ModelBuilder
{
private static string TRAIN_DATA_FILEPATH = @"Path";
private static string MODEL_FILEPATH = @"/path/to/model";
// Create MLContext to be shared across the model creation workflow objects
// Set a random seed for repeatable/deterministic results across multiple trainings.
private static MLContext mlContext = new MLContext(seed: 1);

public static void CreateModel()
{
// Load Data
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
path: TRAIN_DATA_FILEPATH,
hasHeader : false,
separatorChar : ',',
allowQuoting : true,
allowSparse: true);

// Build training pipeline
IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

// Train Model
ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

// Evaluate quality of Model
Evaluate(mlContext, trainingDataView, trainingPipeline);

// Save model
SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema);
}

public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext)
{
// Data process configuration with pipeline data transformations
var dataProcessPipeline = mlContext.Transforms.PreTrainerTransformer1
.AppendCacheCheckpoint(mlContext);
// Set the training algorithm
var trainer = mlContext.Task1.Trainers.Trainer
.Append(mlContext.Transforms.PostTrainerTransformer1);

var trainingPipeline = dataProcessPipeline.Append(trainer);

return trainingPipeline;
}

public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
{
Console.WriteLine("=============== Training model ===============");

ITransformer model = trainingPipeline.Fit(trainingDataView);

Console.WriteLine("=============== End of training process ===============");
return model;
}

private static void Evaluate(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
{
// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
// in order to evaluate and get the model's accuracy metrics
Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
}

private static void SaveModel(MLContext mlContext, ITransformer mlModel, string modelRelativePath, DataViewSchema modelInputSchema)
{
// Save/persist the trained model to a .ZIP file
Console.WriteLine($"=============== Saving the model ===============");
mlContext.Model.Save(mlModel, modelInputSchema, GetAbsolutePath(modelRelativePath));
Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));
}

public static string GetAbsolutePath(string relativePath)
{
FileInfo _dataRoot = new FileInfo(typeof(Program).Assembly.Location);
string assemblyFolderPath = _dataRoot.Directory.FullName;

string fullPath = Path.Combine(assemblyFolderPath, relativePath);

return fullPath;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ namespace Namespace.ConsoleApp
var predictionResult = ConsumeModel.Predict(sampleData);

Console.WriteLine("Using model to make single prediction -- Comparing actual LabelName with predicted LabelName from sample data...\n\n");
Console.WriteLine($"key1: {sampleData.key1}");
Console.WriteLine($"key2: {sampleData.key2}");
Console.WriteLine($"key3: {sampleData.key3}");
Console.WriteLine("=============== End of process, hit any key to finish ===============");
Console.ReadKey();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,31 @@ public void TestConsumeModel()

Approvals.Verify(consumeModel.TransformText());
}

[Fact]
[UseReporter(typeof(DiffReporter))]
[MethodImpl(MethodImplOptions.NoInlining)]
public void TestModelBuilder_noOnnx_noTestData()
{
var modelBuilder = new ModelBuilder()
{
Namespace = "Namespace",
HasOnnxModel = false,
Path = "Path",
Separator = ',',
PreTrainerTransforms = new string[] { "PreTrainerTransformer1" },
Trainer = "Trainer",
TaskType = "Task1",
GeneratedUsings = "Using package1",
AllowQuoting = true,
AllowSparse = true,
LabelName = "Label",
CacheBeforeTrainer = true,
PostTrainerTransforms = new string[] { "PostTrainerTransformer1" },
MLNetModelpath = "/path/to/model",
};

Approvals.Verify(modelBuilder.TransformText());
}
}
}