Skip to content

Commit e14db27

Browse files
authored
Scrubbing task: rest of transforms (#2876)
1 parent 4cb4fea commit e14db27

File tree

8 files changed

+44
-56
lines changed

8 files changed

+44
-56
lines changed

docs/code/MlNetCookBook.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -944,9 +944,9 @@ public static IDataView PrepareData(MLContext mlContext, IDataView data)
944944
{
945945
// Define the operation code.
946946
Action<InputRow, OutputRow> mapping = (input, output) => output.Label = input.Income > 50000;
947-
// Make a custom transformer and transform the data.
948-
var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null);
949-
return transformer.Transform(data);
947+
// Make a custom estimator and transform the data.
948+
var estimator = mlContext.Transforms.CustomMapping(mapping, null);
949+
return estimator.Fit(data).Transform(data);
950950
}
951951
```
952952

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static IDataView GetHousingRegressionIDataView(MLContext mlContext, out s
1919
if (binaryPrediction)
2020
{
2121
labelColumn = nameof(BinaryOutputRow.AboveAverage);
22-
data = mlContext.Transforms.CustomMappingTransformer(GreaterThanAverage, null).Transform(data);
22+
data = mlContext.Transforms.CustomMapping(GreaterThanAverage, null).Fit(data).Transform(data);
2323
data = mlContext.Transforms.DropColumns("MedianHomeValue").Fit(data).Transform(data);
2424
}
2525

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs

+31-19
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,35 @@ public static void Example()
1616
var trainData = mlContext.Data.LoadFromEnumerable(data);
1717

1818
// Preview of the data.
19-
//
20-
// Age Case Education Induced Parity PooledStratum RowNum ...
21-
// 26 1 0-5yrs 1 6 3 1 ...
22-
// 42 1 0-5yrs 1 1 1 2 ...
23-
// 39 1 0-5yrs 2 6 4 3 ...
24-
// 34 1 0-5yrs 2 4 2 4 ...
25-
// 35 1 6-11yrs 1 3 32 5 ...
19+
// Age RowNum Education ...
20+
// 26 0 0-5yrs ...
21+
// 42 1 0-5yrs ...
22+
// 39 2 12+yrs ...
23+
// 34 3 0-5yrs ...
24+
// 35 4 6-11yrs ...
2625

2726
// We define the custom mapping between input and output rows that will be applied by the transformation.
28-
Action<SamplesUtils.DatasetUtils.SampleInfertData, SampleInfertDataTransformed> mapping =
27+
Action<SamplesUtils.DatasetUtils.SampleInfertData, OutputRow> mapping =
2928
(input, output) => output.IsUnderThirty = input.Age < 30;
3029

3130
// Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly.
32-
var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null);
33-
var transformedData = transformer.Transform(trainData);
31+
var estimator = mlContext.Transforms.CustomMapping(mapping, null);
32+
var transformedData = estimator.Fit(trainData).Transform(trainData);
33+
34+
// Preview 5 lines of the transformed data.
35+
transformedData = mlContext.Data.TakeRows(transformedData, 5);
36+
var dataEnumerable = mlContext.Data.CreateEnumerable<SampleInfertDataTransformed>(transformedData, reuseRowObject: true);
37+
Console.WriteLine("IsUnderThirty\t Age\t RowNum\t Education\t ...");
38+
foreach (var row in dataEnumerable)
39+
Console.WriteLine($"{row.IsUnderThirty}\t {row.Age}\t {row.RowNum}\t {row.Education}\t ...");
40+
// Expected output:
41+
// IsUnderThirty Age RowNum Education ...
42+
// True 26 0 0-5yrs ...
43+
// False 42 1 0-5yrs ...
44+
// False 39 2 12+yrs ...
45+
// False 34 3 0-5yrs ...
46+
// False 35 4 6-11yrs ...
3447

35-
// Preview of the data.
36-
//
37-
// IsUnderThirty Age Case Education Induced Parity PooledStratum RowNum ...
38-
// true 26 1 0-5yrs 1 6 3 1 ...
39-
// false 42 1 0-5yrs 1 1 1 2 ...
40-
// false 39 1 0-5yrs 2 6 4 3 ...
41-
// false 34 1 0-5yrs 2 4 2 4 ...
42-
// false 35 1 6-11yrs 1 3 32 5 ...
4348

4449
// Here instead we use it as part of a pipeline of estimators.
4550
var pipeline = mlContext.Transforms.CustomMapping(mapping, null)
@@ -53,12 +58,19 @@ public static void Example()
5358
transformedData = pipeline.Fit(trainData).Transform(trainData);
5459
}
5560

61+
// This defines only the column to be generated by the transformation in addition to the columns already present.
62+
public class OutputRow
63+
{
64+
public bool IsUnderThirty { get; set; }
65+
}
66+
5667
// Represents the transformed infertility dataset.
5768
public class SampleInfertDataTransformed
5869
{
70+
public bool IsUnderThirty { get; set; }
71+
public float Age { get; set; }
5972
public int RowNum { get; set; }
6073
public string Education { get; set; }
61-
public bool IsUnderThirty { get; set; }
6274
public float Parity { get; set; }
6375
public float Induced { get; set; }
6476
public float Case { get; set; }

docs/samples/Microsoft.ML.Samples/Program.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ internal static class Program
66
{
77
static void Main(string[] args)
88
{
9-
TakeRows.Example();
9+
CustomMapping.Example();
1010
}
1111
}
1212
}

src/Microsoft.ML.Transforms/CustomMappingCatalog.cs

-24
Original file line numberDiff line numberDiff line change
@@ -34,29 +34,5 @@ public static CustomMappingEstimator<TSrc, TDst> CustomMapping<TSrc, TDst>(this
3434
where TSrc : class, new()
3535
where TDst : class, new()
3636
=> new CustomMappingEstimator<TSrc, TDst>(catalog.GetEnvironment(), mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition);
37-
38-
/// <summary>
39-
/// Create a custom mapping of input columns to output columns. Most likely, you should call this method when you are loading the model:
40-
/// use <see cref="CustomMapping{TSrc, TDst}(TransformsCatalog, Action{TSrc, TDst}, string, SchemaDefinition, SchemaDefinition)"/> when you are
41-
/// training the model.
42-
/// </summary>
43-
/// <typeparam name="TSrc">The class defining which columns to take from the incoming data.</typeparam>
44-
/// <typeparam name="TDst">The class defining which new columns are added to the data.</typeparam>
45-
/// <param name="catalog">The transform catalog</param>
46-
/// <param name="mapAction">The mapping action. This must be thread-safe and free from side effects.</param>
47-
/// <param name="contractName">The contract name, used by ML.NET for loading the model. If <c>null</c> is specified, such a trained model would not be save-able.</param>
48-
/// <param name="inputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TSrc"/> and input data.</param>
49-
/// <param name="outputSchemaDefinition">Additional parameters for schema mapping between <typeparamref name="TDst"/> and output data.</param>
50-
/// <example>
51-
/// <format type="text/markdown">
52-
/// <![CDATA[
53-
/// [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs)]
54-
/// ]]></format>
55-
/// </example>
56-
public static CustomMappingTransformer<TSrc, TDst> CustomMappingTransformer<TSrc, TDst>(this TransformsCatalog catalog, Action<TSrc, TDst> mapAction, string contractName,
57-
SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
58-
where TSrc : class, new()
59-
where TDst : class, new()
60-
=> new CustomMappingTransformer<TSrc, TDst>(catalog.GetEnvironment(), mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition);
6137
}
6238
}

src/Microsoft.ML.Transforms/ExtensionsCatalog.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,19 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor
3838
/// Creates a new output column, or replaces the source with a new column
3939
/// (depending on whether the <paramref name="outputColumnName"/> is given a value, or left to null)
4040
/// identical to the input column for everything but the missing values. The missing values of the input column, in this new column are replaced with
41-
/// one of the values specifid in the <paramref name="replacementKind"/>. The default for the <paramref name="replacementKind"/> is
41+
/// one of the values specifid in the <paramref name="replacementMode"/>. The default for the <paramref name="replacementMode"/> is
4242
/// <see cref="MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.DefaultValue"/>.
4343
/// </summary>
4444
/// <param name="catalog">The transform extensions' catalog.</param>
4545
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
4646
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
4747
/// If not provided, the <paramref name="inputColumnName"/> will be replaced with the results of the transforms.</param>
48-
/// <param name="replacementKind">The type of replacement to use as specified in <see cref="MissingValueReplacingEstimator.ColumnOptions.ReplacementMode"/></param>
48+
/// <param name="replacementMode">The type of replacement to use as specified in <see cref="MissingValueReplacingEstimator.ColumnOptions.ReplacementMode"/></param>
4949
public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog,
5050
string outputColumnName,
5151
string inputColumnName = null,
52-
MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode)
53-
=> new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, replacementKind);
52+
MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode)
53+
=> new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, replacementMode);
5454

5555
/// <summary>
5656
/// Creates a new output column, identical to the input column for everything but the missing values.

src/Microsoft.ML.Transforms/MissingValueIndicatorTransformer.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ private static VersionInfo GetVersionInfo()
8080
/// <summary>
8181
/// The names of the output and input column pairs for the transformation.
8282
/// </summary>
83-
public IReadOnlyList<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
83+
internal IReadOnlyList<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
8484

8585
/// <summary>
8686
/// Initializes a new instance of <see cref="MissingValueIndicatorTransformer"/>

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,9 @@ public static IDataView PrepareData(MLContext mlContext, IDataView data)
543543
{
544544
// Define the operation code.
545545
Action<InputRow, OutputRow> mapping = (input, output) => output.Label = input.Income > 50000;
546-
// Make a custom transformer and transform the data.
547-
var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null);
548-
return transformer.Transform(data);
546+
// Make a custom estimator and transform the data.
547+
var estimator = mlContext.Transforms.CustomMapping(mapping, null);
548+
return estimator.Fit(data).Transform(data);
549549
}
550550

551551
public static ITransformer TrainModel(MLContext mlContext, IDataView trainData)

0 commit comments

Comments
 (0)