Skip to content

Add V1 Scenario tests for data transformation #2803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Addressing PR Comments.
  • Loading branch information
Rogan Carr committed Mar 4, 2019
commit 3e68c5deb021c1f8b4590b92e42e7e2f27992679
73 changes: 14 additions & 59 deletions test/Microsoft.ML.Functional.Tests/DataTransformation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ public DataTransformation(ITestOutputHelper output) : base(output)
}

/// <summary>
/// Extensibility: Add a new column.
/// Extensibility: Add a new column that is a function of other columns.
/// </summary>
[Fact]
void ExtensibilityAddingOneColumn()
void ExtensibilityAddAColumnAsAFunctionOfMultipleColumns()
{
// Concurrency must be 1 to assure that the mapping is done sequentially.
var mlContext = new MLContext(seed: 1, conc: 1);
Expand All @@ -39,11 +39,19 @@ void ExtensibilityAddingOneColumn()
int numSamples = 10;
data = mlContext.Data.TakeRows(data, numSamples);

// Create a stand-alone function to produce a random number.
float angiospermCosine(float petalWidth, float petalLength, float sepalWidth, float sepalLength)
{
var petalMagnitude = Math.Sqrt(petalWidth * petalWidth + petalLength * petalLength);
var sepalMagnitude = Math.Sqrt(sepalWidth * sepalWidth + sepalLength * sepalLength);
return (float)((petalWidth * sepalWidth + petalLength * sepalLength) / (petalMagnitude * sepalMagnitude));
}

// Create a function that generates a column.
Action<Iris, IrisWithOneExtraColumn> generateGroupId = (input, output) =>
{
output.Label = input.Label;
output.Float1 = GetRandomNumber(input.Label + input.PetalLength + input.PetalWidth + input.SepalLength + input.SepalWidth);
output.Float1 = angiospermCosine(input.PetalLength, input.PetalWidth, input.SepalLength, input.SepalWidth);
output.PetalLength = input.PetalLength;
output.PetalWidth = input.PetalWidth;
output.SepalLength = input.SepalLength;
Expand All @@ -60,8 +68,8 @@ void ExtensibilityAddingOneColumn()
var transformedRows = mlContext.Data.CreateEnumerable<IrisWithOneExtraColumn>(transformedData, reuseRowObject: true);
foreach (var row in transformedRows)
{
var randomNumber = GetRandomNumber(row.Label + row.PetalLength + row.PetalWidth + row.SepalLength + row.SepalWidth);
Assert.Equal(randomNumber, row.Float1);
var cosineDistance = angiospermCosine(row.PetalLength, row.PetalWidth, row.SepalLength, row.SepalWidth);
Assert.Equal(cosineDistance, row.Float1);
}
}

Expand Down Expand Up @@ -113,59 +121,6 @@ void ExtensibilityAddingTwoColumns()
}
}

/// <summary>
/// Extensibility: Add a new column that is a function of other columns.
/// </summary>
[Fact]
void ExtensibilityAddAColumnAsAFunctionOfMultipleColumns()
{
// Concurrency must be 1 to assure that the mapping is done sequentially.
var mlContext = new MLContext(seed: 1, conc: 1);

// Load the Iris dataset
var data = mlContext.Data.LoadFromTextFile<Iris>(
GetDataPath(TestDatasets.iris.trainFilename),
hasHeader: TestDatasets.iris.fileHasHeader,
separatorChar: TestDatasets.iris.fileSeparator);

// Subsample it down to the first 10 rows.
int numSamples = 10;
data = mlContext.Data.TakeRows(data, numSamples);

// Create a stand-alone function to produce a random number.
float angiospermCosine(float petalWidth, float petalLength, float sepalWidth, float sepalLength)
{
var petalMagnitude = Math.Sqrt(petalWidth * petalWidth + petalLength * petalLength);
var sepalMagnitude = Math.Sqrt(sepalWidth * sepalWidth + sepalLength * sepalLength);
return (float)((petalWidth * sepalWidth + petalLength * sepalLength) / (petalMagnitude * sepalMagnitude));
}

// Create a function that generates a column.
Action<Iris, IrisWithOneExtraColumn> generateGroupId = (input, output) =>
{
output.Label = input.Label;
output.Float1 = angiospermCosine(input.PetalLength, input.PetalWidth, input.SepalLength, input.SepalWidth);
output.PetalLength = input.PetalLength;
output.PetalWidth = input.PetalWidth;
output.SepalLength = input.SepalLength;
output.SepalWidth = input.SepalWidth;
};

// Create a pipeline to execute the custom function.
var pipeline = mlContext.Transforms.CustomMapping(generateGroupId, null);

// Transform the data.
var transformedData = pipeline.Fit(data).Transform(data);

// Verify that the column has the correct data.
var transformedRows = mlContext.Data.CreateEnumerable<IrisWithOneExtraColumn>(transformedData, reuseRowObject: true);
foreach (var row in transformedRows)
{
var cosineDistance = angiospermCosine(row.PetalLength, row.PetalWidth, row.SepalLength, row.SepalWidth);
Assert.Equal(cosineDistance, row.Float1);
}
}

/// <summary>
/// Extensibility: Featurize text using custom word-grams, char-grams, and normalization.
/// </summary>
Expand Down Expand Up @@ -204,7 +159,7 @@ void ExtensibilityModifyTextFeaturization()
}

/// <summary>
/// Extensibility: Featurize text using custom word-grams, char-grams, and normalization.
/// Extensibility: Apply a normalizer to columns in the dataset.
/// </summary>
Copy link
Contributor

@artidoro artidoro Mar 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you change summary tag to match the test? #Resolved

[Fact]
void ExtensibilityNormalizeColumns()
Expand Down