@@ -119,4 +119,148 @@ public final void testNGram() {
119
119
assertEquals (ngram .get (3 ), null );
120
120
121
121
}
122
+
123
+ /**
124
+ * Test method for {@link NGram#normalize(char)} with Romanian characters
125
+ */
126
+ @ Test
127
+ public final void testNormalizeForRomanian () {
128
+ assertEquals (NGram .normalize ('\u015f' ), '\u015f' );
129
+ assertEquals (NGram .normalize ('\u0163' ), '\u0163' );
130
+ assertEquals (NGram .normalize ('\u0219' ), '\u015f' );
131
+ assertEquals (NGram .normalize ('\u021b' ), '\u0163' );
132
+ }
133
+
134
+ @ Test
135
+ public final void testNormalizeVietnamese () {
136
+ assertEquals (NGram .normalizeVietnamese ("" ), "" );
137
+ assertEquals (NGram .normalizeVietnamese ("ABC" ), "ABC" );
138
+ assertEquals (NGram .normalizeVietnamese ("012" ), "012" );
139
+ assertEquals (NGram .normalizeVietnamese ("\u00c0 " ), "\u00c0 " );
140
+
141
+ assertEquals (NGram .normalizeVietnamese ("\u0041 \u0300 " ), "\u00C0 " );
142
+ assertEquals (NGram .normalizeVietnamese ("\u0045 \u0300 " ), "\u00C8 " );
143
+ assertEquals (NGram .normalizeVietnamese ("\u0049 \u0300 " ), "\u00CC " );
144
+ assertEquals (NGram .normalizeVietnamese ("\u004F \u0300 " ), "\u00D2 " );
145
+ assertEquals (NGram .normalizeVietnamese ("\u0055 \u0300 " ), "\u00D9 " );
146
+ assertEquals (NGram .normalizeVietnamese ("\u0059 \u0300 " ), "\u1EF2 " );
147
+ assertEquals (NGram .normalizeVietnamese ("\u0061 \u0300 " ), "\u00E0 " );
148
+ assertEquals (NGram .normalizeVietnamese ("\u0065 \u0300 " ), "\u00E8 " );
149
+ assertEquals (NGram .normalizeVietnamese ("\u0069 \u0300 " ), "\u00EC " );
150
+ assertEquals (NGram .normalizeVietnamese ("\u006F \u0300 " ), "\u00F2 " );
151
+ assertEquals (NGram .normalizeVietnamese ("\u0075 \u0300 " ), "\u00F9 " );
152
+ assertEquals (NGram .normalizeVietnamese ("\u0079 \u0300 " ), "\u1EF3 " );
153
+ assertEquals (NGram .normalizeVietnamese ("\u00C2 \u0300 " ), "\u1EA6 " );
154
+ assertEquals (NGram .normalizeVietnamese ("\u00CA \u0300 " ), "\u1EC0 " );
155
+ assertEquals (NGram .normalizeVietnamese ("\u00D4 \u0300 " ), "\u1ED2 " );
156
+ assertEquals (NGram .normalizeVietnamese ("\u00E2 \u0300 " ), "\u1EA7 " );
157
+ assertEquals (NGram .normalizeVietnamese ("\u00EA \u0300 " ), "\u1EC1 " );
158
+ assertEquals (NGram .normalizeVietnamese ("\u00F4 \u0300 " ), "\u1ED3 " );
159
+ assertEquals (NGram .normalizeVietnamese ("\u0102 \u0300 " ), "\u1EB0 " );
160
+ assertEquals (NGram .normalizeVietnamese ("\u0103 \u0300 " ), "\u1EB1 " );
161
+ assertEquals (NGram .normalizeVietnamese ("\u01A0 \u0300 " ), "\u1EDC " );
162
+ assertEquals (NGram .normalizeVietnamese ("\u01A1 \u0300 " ), "\u1EDD " );
163
+ assertEquals (NGram .normalizeVietnamese ("\u01AF \u0300 " ), "\u1EEA " );
164
+ assertEquals (NGram .normalizeVietnamese ("\u01B0 \u0300 " ), "\u1EEB " );
165
+
166
+ assertEquals (NGram .normalizeVietnamese ("\u0041 \u0301 " ), "\u00C1 " );
167
+ assertEquals (NGram .normalizeVietnamese ("\u0045 \u0301 " ), "\u00C9 " );
168
+ assertEquals (NGram .normalizeVietnamese ("\u0049 \u0301 " ), "\u00CD " );
169
+ assertEquals (NGram .normalizeVietnamese ("\u004F \u0301 " ), "\u00D3 " );
170
+ assertEquals (NGram .normalizeVietnamese ("\u0055 \u0301 " ), "\u00DA " );
171
+ assertEquals (NGram .normalizeVietnamese ("\u0059 \u0301 " ), "\u00DD " );
172
+ assertEquals (NGram .normalizeVietnamese ("\u0061 \u0301 " ), "\u00E1 " );
173
+ assertEquals (NGram .normalizeVietnamese ("\u0065 \u0301 " ), "\u00E9 " );
174
+ assertEquals (NGram .normalizeVietnamese ("\u0069 \u0301 " ), "\u00ED " );
175
+ assertEquals (NGram .normalizeVietnamese ("\u006F \u0301 " ), "\u00F3 " );
176
+ assertEquals (NGram .normalizeVietnamese ("\u0075 \u0301 " ), "\u00FA " );
177
+ assertEquals (NGram .normalizeVietnamese ("\u0079 \u0301 " ), "\u00FD " );
178
+ assertEquals (NGram .normalizeVietnamese ("\u00C2 \u0301 " ), "\u1EA4 " );
179
+ assertEquals (NGram .normalizeVietnamese ("\u00CA \u0301 " ), "\u1EBE " );
180
+ assertEquals (NGram .normalizeVietnamese ("\u00D4 \u0301 " ), "\u1ED0 " );
181
+ assertEquals (NGram .normalizeVietnamese ("\u00E2 \u0301 " ), "\u1EA5 " );
182
+ assertEquals (NGram .normalizeVietnamese ("\u00EA \u0301 " ), "\u1EBF " );
183
+ assertEquals (NGram .normalizeVietnamese ("\u00F4 \u0301 " ), "\u1ED1 " );
184
+ assertEquals (NGram .normalizeVietnamese ("\u0102 \u0301 " ), "\u1EAE " );
185
+ assertEquals (NGram .normalizeVietnamese ("\u0103 \u0301 " ), "\u1EAF " );
186
+ assertEquals (NGram .normalizeVietnamese ("\u01A0 \u0301 " ), "\u1EDA " );
187
+ assertEquals (NGram .normalizeVietnamese ("\u01A1 \u0301 " ), "\u1EDB " );
188
+ assertEquals (NGram .normalizeVietnamese ("\u01AF \u0301 " ), "\u1EE8 " );
189
+ assertEquals (NGram .normalizeVietnamese ("\u01B0 \u0301 " ), "\u1EE9 " );
190
+
191
+ assertEquals (NGram .normalizeVietnamese ("\u0041 \u0303 " ), "\u00C3 " );
192
+ assertEquals (NGram .normalizeVietnamese ("\u0045 \u0303 " ), "\u1EBC " );
193
+ assertEquals (NGram .normalizeVietnamese ("\u0049 \u0303 " ), "\u0128 " );
194
+ assertEquals (NGram .normalizeVietnamese ("\u004F \u0303 " ), "\u00D5 " );
195
+ assertEquals (NGram .normalizeVietnamese ("\u0055 \u0303 " ), "\u0168 " );
196
+ assertEquals (NGram .normalizeVietnamese ("\u0059 \u0303 " ), "\u1EF8 " );
197
+ assertEquals (NGram .normalizeVietnamese ("\u0061 \u0303 " ), "\u00E3 " );
198
+ assertEquals (NGram .normalizeVietnamese ("\u0065 \u0303 " ), "\u1EBD " );
199
+ assertEquals (NGram .normalizeVietnamese ("\u0069 \u0303 " ), "\u0129 " );
200
+ assertEquals (NGram .normalizeVietnamese ("\u006F \u0303 " ), "\u00F5 " );
201
+ assertEquals (NGram .normalizeVietnamese ("\u0075 \u0303 " ), "\u0169 " );
202
+ assertEquals (NGram .normalizeVietnamese ("\u0079 \u0303 " ), "\u1EF9 " );
203
+ assertEquals (NGram .normalizeVietnamese ("\u00C2 \u0303 " ), "\u1EAA " );
204
+ assertEquals (NGram .normalizeVietnamese ("\u00CA \u0303 " ), "\u1EC4 " );
205
+ assertEquals (NGram .normalizeVietnamese ("\u00D4 \u0303 " ), "\u1ED6 " );
206
+ assertEquals (NGram .normalizeVietnamese ("\u00E2 \u0303 " ), "\u1EAB " );
207
+ assertEquals (NGram .normalizeVietnamese ("\u00EA \u0303 " ), "\u1EC5 " );
208
+ assertEquals (NGram .normalizeVietnamese ("\u00F4 \u0303 " ), "\u1ED7 " );
209
+ assertEquals (NGram .normalizeVietnamese ("\u0102 \u0303 " ), "\u1EB4 " );
210
+ assertEquals (NGram .normalizeVietnamese ("\u0103 \u0303 " ), "\u1EB5 " );
211
+ assertEquals (NGram .normalizeVietnamese ("\u01A0 \u0303 " ), "\u1EE0 " );
212
+ assertEquals (NGram .normalizeVietnamese ("\u01A1 \u0303 " ), "\u1EE1 " );
213
+ assertEquals (NGram .normalizeVietnamese ("\u01AF \u0303 " ), "\u1EEE " );
214
+ assertEquals (NGram .normalizeVietnamese ("\u01B0 \u0303 " ), "\u1EEF " );
215
+
216
+ assertEquals (NGram .normalizeVietnamese ("\u0041 \u0309 " ), "\u1EA2 " );
217
+ assertEquals (NGram .normalizeVietnamese ("\u0045 \u0309 " ), "\u1EBA " );
218
+ assertEquals (NGram .normalizeVietnamese ("\u0049 \u0309 " ), "\u1EC8 " );
219
+ assertEquals (NGram .normalizeVietnamese ("\u004F \u0309 " ), "\u1ECE " );
220
+ assertEquals (NGram .normalizeVietnamese ("\u0055 \u0309 " ), "\u1EE6 " );
221
+ assertEquals (NGram .normalizeVietnamese ("\u0059 \u0309 " ), "\u1EF6 " );
222
+ assertEquals (NGram .normalizeVietnamese ("\u0061 \u0309 " ), "\u1EA3 " );
223
+ assertEquals (NGram .normalizeVietnamese ("\u0065 \u0309 " ), "\u1EBB " );
224
+ assertEquals (NGram .normalizeVietnamese ("\u0069 \u0309 " ), "\u1EC9 " );
225
+ assertEquals (NGram .normalizeVietnamese ("\u006F \u0309 " ), "\u1ECF " );
226
+ assertEquals (NGram .normalizeVietnamese ("\u0075 \u0309 " ), "\u1EE7 " );
227
+ assertEquals (NGram .normalizeVietnamese ("\u0079 \u0309 " ), "\u1EF7 " );
228
+ assertEquals (NGram .normalizeVietnamese ("\u00C2 \u0309 " ), "\u1EA8 " );
229
+ assertEquals (NGram .normalizeVietnamese ("\u00CA \u0309 " ), "\u1EC2 " );
230
+ assertEquals (NGram .normalizeVietnamese ("\u00D4 \u0309 " ), "\u1ED4 " );
231
+ assertEquals (NGram .normalizeVietnamese ("\u00E2 \u0309 " ), "\u1EA9 " );
232
+ assertEquals (NGram .normalizeVietnamese ("\u00EA \u0309 " ), "\u1EC3 " );
233
+ assertEquals (NGram .normalizeVietnamese ("\u00F4 \u0309 " ), "\u1ED5 " );
234
+ assertEquals (NGram .normalizeVietnamese ("\u0102 \u0309 " ), "\u1EB2 " );
235
+ assertEquals (NGram .normalizeVietnamese ("\u0103 \u0309 " ), "\u1EB3 " );
236
+ assertEquals (NGram .normalizeVietnamese ("\u01A0 \u0309 " ), "\u1EDE " );
237
+ assertEquals (NGram .normalizeVietnamese ("\u01A1 \u0309 " ), "\u1EDF " );
238
+ assertEquals (NGram .normalizeVietnamese ("\u01AF \u0309 " ), "\u1EEC " );
239
+ assertEquals (NGram .normalizeVietnamese ("\u01B0 \u0309 " ), "\u1EED " );
240
+
241
+ assertEquals (NGram .normalizeVietnamese ("\u0041 \u0323 " ), "\u1EA0 " );
242
+ assertEquals (NGram .normalizeVietnamese ("\u0045 \u0323 " ), "\u1EB8 " );
243
+ assertEquals (NGram .normalizeVietnamese ("\u0049 \u0323 " ), "\u1ECA " );
244
+ assertEquals (NGram .normalizeVietnamese ("\u004F \u0323 " ), "\u1ECC " );
245
+ assertEquals (NGram .normalizeVietnamese ("\u0055 \u0323 " ), "\u1EE4 " );
246
+ assertEquals (NGram .normalizeVietnamese ("\u0059 \u0323 " ), "\u1EF4 " );
247
+ assertEquals (NGram .normalizeVietnamese ("\u0061 \u0323 " ), "\u1EA1 " );
248
+ assertEquals (NGram .normalizeVietnamese ("\u0065 \u0323 " ), "\u1EB9 " );
249
+ assertEquals (NGram .normalizeVietnamese ("\u0069 \u0323 " ), "\u1ECB " );
250
+ assertEquals (NGram .normalizeVietnamese ("\u006F \u0323 " ), "\u1ECD " );
251
+ assertEquals (NGram .normalizeVietnamese ("\u0075 \u0323 " ), "\u1EE5 " );
252
+ assertEquals (NGram .normalizeVietnamese ("\u0079 \u0323 " ), "\u1EF5 " );
253
+ assertEquals (NGram .normalizeVietnamese ("\u00C2 \u0323 " ), "\u1EAC " );
254
+ assertEquals (NGram .normalizeVietnamese ("\u00CA \u0323 " ), "\u1EC6 " );
255
+ assertEquals (NGram .normalizeVietnamese ("\u00D4 \u0323 " ), "\u1ED8 " );
256
+ assertEquals (NGram .normalizeVietnamese ("\u00E2 \u0323 " ), "\u1EAD " );
257
+ assertEquals (NGram .normalizeVietnamese ("\u00EA \u0323 " ), "\u1EC7 " );
258
+ assertEquals (NGram .normalizeVietnamese ("\u00F4 \u0323 " ), "\u1ED9 " );
259
+ assertEquals (NGram .normalizeVietnamese ("\u0102 \u0323 " ), "\u1EB6 " );
260
+ assertEquals (NGram .normalizeVietnamese ("\u0103 \u0323 " ), "\u1EB7 " );
261
+ assertEquals (NGram .normalizeVietnamese ("\u01A0 \u0323 " ), "\u1EE2 " );
262
+ assertEquals (NGram .normalizeVietnamese ("\u01A1 \u0323 " ), "\u1EE3 " );
263
+ assertEquals (NGram .normalizeVietnamese ("\u01AF \u0323 " ), "\u1EF0 " );
264
+ assertEquals (NGram .normalizeVietnamese ("\u01B0 \u0323 " ), "\u1EF1 " );
265
+ }
122
266
}
0 commit comments