@@ -9,15 +9,21 @@ public static partial class TransformSamples
9
9
{
10
10
public static void Example ( )
11
11
{
12
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13
- // as well as the source of randomness.
12
+ // Create a new ML context, for ML.NET operations. It can be used for
13
+ // exception tracking and logging, as well as the source of randomness.
14
14
var ml = new MLContext ( ) ;
15
15
16
16
// Get a small dataset as an IEnumerable and convert to IDataView.
17
17
var data = new List < SampleSentimentData > ( ) {
18
- new SampleSentimentData { Sentiment = true , SentimentText = "Best game I've ever played." } ,
19
- new SampleSentimentData { Sentiment = false , SentimentText = "==RUDE== Dude, 2" } ,
20
- new SampleSentimentData { Sentiment = true , SentimentText = "Until the next game, this is the best Xbox game!" } } ;
18
+ new SampleSentimentData { Sentiment = true ,
19
+ SentimentText = "Best game I've ever played." } ,
20
+
21
+ new SampleSentimentData { Sentiment = false ,
22
+ SentimentText = "==RUDE== Dude, 2" } ,
23
+
24
+ new SampleSentimentData { Sentiment = true ,
25
+ SentimentText = "Until the next game," +
26
+ "this is the best Xbox game!" } } ;
21
27
22
28
// Convert IEnumerable to IDataView.
23
29
var trainData = ml . Data . LoadFromEnumerable ( data ) ;
@@ -29,23 +35,42 @@ public static void Example()
29
35
// false ==RUDE== Dude, 2.
30
36
// true Until the next game, this is the best Xbox game!
31
37
32
- // A pipeline to tokenize text as characters and then combine them together into n-grams
33
- // The pipeline uses the default settings to featurize.
38
+ // A pipeline to tokenize text as characters and then combine them
39
+ // together into n-grams. The pipeline uses the default settings to
40
+ // featurize.
41
+
42
+ var charsPipeline = ml . Transforms . Text
43
+ . TokenizeIntoCharactersAsKeys ( "Chars" , "SentimentText" ,
44
+ useMarkerCharacters : false ) ;
45
+
46
+ var ngramOnePipeline = ml . Transforms . Text
47
+ . ProduceNgrams ( "CharsUnigrams" , "Chars" , ngramLength : 1 ) ;
34
48
35
- var charsPipeline = ml . Transforms . Text . TokenizeIntoCharactersAsKeys ( "Chars" , "SentimentText" , useMarkerCharacters : false ) ;
36
- var ngramOnePipeline = ml . Transforms . Text . ProduceNgrams ( "CharsUnigrams" , "Chars" , ngramLength : 1 ) ;
37
- var ngramTwpPipeline = ml . Transforms . Text . ProduceNgrams ( "CharsTwograms" , "Chars" ) ;
38
- var oneCharsPipeline = charsPipeline . Append ( ngramOnePipeline ) ;
39
- var twoCharsPipeline = charsPipeline . Append ( ngramTwpPipeline ) ;
49
+ var ngramTwpPipeline = ml . Transforms . Text
50
+ . ProduceNgrams ( "CharsTwograms" , "Chars" ) ;
51
+
52
+ var oneCharsPipeline = charsPipeline
53
+ . Append ( ngramOnePipeline ) ;
54
+
55
+ var twoCharsPipeline = charsPipeline
56
+ . Append ( ngramTwpPipeline ) ;
40
57
41
58
// The transformed data for pipelines.
42
- var transformedData_onechars = oneCharsPipeline . Fit ( trainData ) . Transform ( trainData ) ;
43
- var transformedData_twochars = twoCharsPipeline . Fit ( trainData ) . Transform ( trainData ) ;
59
+ var transformedData_onechars = oneCharsPipeline . Fit ( trainData )
60
+ . Transform ( trainData ) ;
61
+
62
+ var transformedData_twochars = twoCharsPipeline . Fit ( trainData )
63
+ . Transform ( trainData ) ;
44
64
45
65
// Small helper to print the text inside the columns, in the console.
46
- Action < string , IEnumerable < VBuffer < float > > , VBuffer < ReadOnlyMemory < char > > > printHelper = ( columnName , column , names ) =>
66
+ Action < string , IEnumerable < VBuffer < float > > ,
67
+ VBuffer < ReadOnlyMemory < char > > >
68
+ printHelper = ( columnName , column , names ) =>
69
+
47
70
{
48
- Console . WriteLine ( $ "{ columnName } column obtained post-transformation.") ;
71
+ Console . WriteLine (
72
+ $ "{ columnName } column obtained post-transformation.") ;
73
+
49
74
var slots = names . GetValues ( ) ;
50
75
foreach ( var featureRow in column )
51
76
{
@@ -54,21 +79,33 @@ public static void Example()
54
79
Console . WriteLine ( "" ) ;
55
80
}
56
81
57
- Console . WriteLine ( "===================================================" ) ;
82
+ Console . WriteLine (
83
+ "===================================================" ) ;
58
84
} ;
59
- // Preview of the CharsUnigrams column obtained after processing the input.
85
+ // Preview of the CharsUnigrams column obtained after processing the
86
+ // input.
60
87
VBuffer < ReadOnlyMemory < char > > slotNames = default ;
61
- transformedData_onechars . Schema [ "CharsUnigrams" ] . GetSlotNames ( ref slotNames ) ;
62
- var charsOneGramColumn = transformedData_onechars . GetColumn < VBuffer < float > > ( transformedData_onechars . Schema [ "CharsUnigrams" ] ) ;
88
+ transformedData_onechars . Schema [ "CharsUnigrams" ]
89
+ . GetSlotNames ( ref slotNames ) ;
90
+
91
+ var charsOneGramColumn = transformedData_onechars
92
+ . GetColumn < VBuffer < float > > ( transformedData_onechars
93
+ . Schema [ "CharsUnigrams" ] ) ;
94
+
63
95
printHelper ( "CharsUnigrams" , charsOneGramColumn , slotNames ) ;
64
96
65
97
// CharsUnigrams column obtained post-transformation.
66
98
// 'B' - 1 'e' - 6 's' - 1 't' - 1 '<?>' - 4 'g' - 1 'a' - 2 'm' - 1 'I' - 1 ''' - 1 'v' - 2 ...
67
99
// 'e' - 1 '<?>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
68
100
// 'B' - 0 'e' - 6 's' - 3 't' - 6 '<?>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
69
101
// Preview of the CharsTwoGrams column obtained after processing the input.
70
- var charsTwoGramColumn = transformedData_twochars . GetColumn < VBuffer < float > > ( transformedData_twochars . Schema [ "CharsTwograms" ] ) ;
71
- transformedData_twochars . Schema [ "CharsTwograms" ] . GetSlotNames ( ref slotNames ) ;
102
+ var charsTwoGramColumn = transformedData_twochars
103
+ . GetColumn < VBuffer < float > > ( transformedData_twochars
104
+ . Schema [ "CharsTwograms" ] ) ;
105
+
106
+ transformedData_twochars . Schema [ "CharsTwograms" ]
107
+ . GetSlotNames ( ref slotNames ) ;
108
+
72
109
printHelper ( "CharsTwograms" , charsTwoGramColumn , slotNames ) ;
73
110
74
111
// CharsTwograms column obtained post-transformation.
@@ -78,7 +115,8 @@ public static void Example()
78
115
}
79
116
80
117
/// <summary>
81
- /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment.
118
+ /// A dataset that contains a tweet and the sentiment assigned to that
119
+ /// tweet: 0 - negative and 1 - positive sentiment.
82
120
/// </summary>
83
121
public class SampleSentimentData
84
122
{
0 commit comments