-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add option to execute only the last transform in TransformWrapper and have WordBagEstimator return transformer chain #3700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
0879374
Add option to execute only the last transform in TransformWrapper.
codemzs 75757c4
PR feedback.
codemzs 1b2be0f
cleanup.
codemzs e74e52a
cleanup.
codemzs 0e3dc6a
PR feedback.
codemzs 91c6a13
PR feedback.
codemzs 06cd394
PR feedback.
codemzs 1b5ede1
PR feedback.
codemzs c9ef723
PR feedback.
codemzs dcaee20
PR feedback.
codemzs e29a75e
fix paths.
codemzs 93411f1
PR feedback.
codemzs 55243df
PR feedback.
codemzs File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PR feedback.
- Loading branch information
commit 75757c4221a55b23ad5bec5ae4e3dc98989f82b5
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using Xunit; | ||
using System.Collections.Generic; | ||
using Microsoft.ML.Transforms.Text; | ||
|
||
namespace Microsoft.ML.Scenarios | ||
{ | ||
public partial class ScenariosTests | ||
{ | ||
[Fact] | ||
public static void WordBags() | ||
{ | ||
var mlContext = new MLContext(); | ||
var samples = new List<TextData>() | ||
{ | ||
new TextData(){ Text = "This is an example to compute bag-of-word features." }, | ||
new TextData(){ Text = "ML.NET's ProduceWordBags API produces bag-of-word features from input text." }, | ||
new TextData(){ Text = "It does so by first tokenizing text/string into words/tokens then " }, | ||
new TextData(){ Text = "computing n-grams and their neumeric values." }, | ||
new TextData(){ Text = "Each position in the output vector corresponds to a particular n-gram." }, | ||
new TextData(){ Text = "The value at each position corresponds to," }, | ||
new TextData(){ Text = "the number of times n-gram occured in the data (Tf), or" }, | ||
new TextData(){ Text = "the inverse of the number of documents contain the n-gram (Idf)," }, | ||
new TextData(){ Text = "or compute both and multipy together (Tf-Idf)." }, | ||
}; | ||
|
||
var dataview = mlContext.Data.LoadFromEnumerable(samples); | ||
var textPipeline = | ||
mlContext.Transforms.Text.ProduceWordBags("Text", "Text", | ||
ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf).Append( | ||
mlContext.Transforms.Text.ProduceWordBags("Text2", "Text2", | ||
codemzs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf)); | ||
|
||
|
||
var textTransformer = textPipeline.Fit(dataview); | ||
var transformedDataView = textTransformer.Transform(dataview); | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer); | ||
var prediction = predictionEngine.Predict(samples[0]); | ||
Assert.Equal(prediction.Text, new float[] { | ||
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); | ||
} | ||
|
||
private class TextData | ||
{ | ||
public string Text { get; set; } | ||
#pragma warning disable 414 | ||
public string Text2 = "ABC"; | ||
#pragma warning restore 414 | ||
} | ||
|
||
private class TransformedTextData | ||
{ | ||
public float[] Text { get; set; } | ||
} | ||
} | ||
|
||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.