Skip to content

Commit e3d9c85

Browse files
authored
Merge pull request jprante#3 from Automattic/add-merged-average-profile
Add merged-average language profile
2 parents 47e9214 + 437fa12 commit e3d9c85

File tree

63 files changed

+249
-116
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+249
-116
lines changed

scripts/README.md

Lines changed: 3 additions & 2 deletions

scripts/run.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
from collections import defaultdict
2+
from fractions import Fraction
13
from io import BytesIO
4+
import json
5+
import math
26
import os
37
import re
8+
import shutil
49
from zipfile import ZipFile
510

611
import baker
712
import ftfy
813
import requests
914
import xmltodict
1015

11-
_TEST_RESOURCES_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)),
12-
'../src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/')
16+
_THIS_PATH = os.path.dirname(os.path.abspath(__file__))
17+
_TEST_RESOURCES_PATH = os.path.join(_THIS_PATH, '../src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/')
18+
_MAIN_RESOURCES_PATH = os.path.join(_THIS_PATH, '../src/main/resources/langdetect/')
1319

1420
# Supported languages according to https://github.com/shuyo/language-detection/blob/wiki/LanguageList.md
1521
_SUPPORTED_LANGUAGES = {
@@ -172,5 +178,57 @@ def generate_wordpress_translations_dataset(out_path=os.path.join(_TEST_RESOURCE
172178
for _, clean_text in clean_texts_with_len[:texts_per_language]:
173179
out_file.write('{}\t{}\n'.format(supported_code, clean_text))
174180

181+
182+
@baker.command
183+
def merge_language_profiles(output_profile_dir='merged-average'):
184+
"""
185+
Preprocess the original default and short-text profile files, averaging the normalized n-gram frequencies from the
186+
two profiles to create a merged profile.
187+
188+
For example, if the trigram "xyz" appears 1 time out of 1000 trigrams in a default profile file and 1 out of 100
189+
in a short profile file, its merged-average frequency would be (0.001 + 0.01) / 2 = 0.0055. This frequency is then
190+
converted back to an integer, as the plugin's Java code assumes the frequencies are integers.
191+
192+
:param output_profile_dir: directory name under the main resource path where the merged profile will be saved,
193+
overwriting any existing files in the directory
194+
"""
195+
merged_dir = os.path.join(_MAIN_RESOURCES_PATH, output_profile_dir)
196+
shutil.rmtree(merged_dir, ignore_errors=True)
197+
os.mkdir(merged_dir)
198+
for lang in _SUPPORTED_LANGUAGES:
199+
merged_profile = dict(name=lang, freq=defaultdict(Fraction), n_words=[1, 1, 1])
200+
profile_paths = []
201+
for profile_name in ['default', 'short']:
202+
profile_path = os.path.join(_MAIN_RESOURCES_PATH, '' if profile_name == 'default' else 'short-text', lang)
203+
if os.path.exists(profile_path):
204+
profile_paths.append(profile_path)
205+
# Copy the original profile without any processing if only one exists
206+
for profile_path in profile_paths:
207+
with open(profile_path, encoding='utf-8') as profile_file:
208+
profile = json.load(profile_file)
209+
# The n_words sums of some profiles are wrong so we fix them here
210+
profile['n_words'] = [0, 0, 0]
211+
for ngram, count in profile['freq'].items():
212+
profile['n_words'][len(ngram) - 1] += count
213+
for ngram, count in profile['freq'].items():
214+
merged_profile['freq'][ngram] += Fraction(count,
215+
profile['n_words'][len(ngram) - 1] * len(profile_paths))
216+
# The least common multiplier of the frequency denominators for each n-gram length is the new n_words
217+
merged_n_words = merged_profile['n_words']
218+
for ngram, freq in merged_profile['freq'].items():
219+
n_words_index = len(ngram) - 1
220+
merged_n_words[n_words_index] = int(merged_n_words[n_words_index] * freq.denominator /
221+
math.gcd(merged_n_words[n_words_index], freq.denominator))
222+
# Ensure we don't exceed the maximum long value in Java
223+
for n_words in merged_n_words:
224+
assert n_words < 2 ** 63
225+
n_words_check = list(merged_n_words)
226+
for ngram, freq in merged_profile['freq'].items():
227+
merged_profile['freq'][ngram] = int(merged_n_words[len(ngram) - 1] * freq)
228+
n_words_check[len(ngram) - 1] -= merged_profile['freq'][ngram]
229+
assert not sum(n_words_check)
230+
with open(os.path.join(merged_dir, lang), 'w', encoding='utf-8') as out_file:
231+
json.dump(merged_profile, out_file, ensure_ascii=False, separators=',:')
232+
175233
if __name__ == '__main__':
176234
baker.run()

src/main/java/org/xbib/elasticsearch/common/langdetect/LangProfile.java

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,34 @@
1212
import java.util.Map;
1313

1414
public class LangProfile {
15+
private final String name;
16+
private final Map<String, Long> freq = new HashMap<>();
17+
private final List<Long> n_words = new ArrayList<>(NGram.N_GRAM);
1518

16-
private String name;
17-
18-
private Map<String, Integer> freq;
19-
20-
private List<Integer> n_words;
21-
22-
public LangProfile() {
23-
this.freq = new HashMap<>();
24-
this.n_words = new ArrayList<>(NGram.N_GRAM);
19+
/**
20+
* Create an empty language profile.
21+
*/
22+
public LangProfile(String name) {
23+
this.name = name;
2524
for (int i = 0; i < NGram.N_GRAM; i++) {
26-
n_words.add(0);
25+
n_words.add(0L);
26+
}
27+
}
28+
29+
/**
30+
* Create a language profile from a JSON input stream.
31+
*/
32+
@SuppressWarnings("unchecked")
33+
public LangProfile(InputStream input) throws IOException {
34+
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(input);
35+
Map<String, Object> map = parser.map();
36+
this.name = (String) map.get("name");
37+
// Explicity convert the numbers because they may get parsed as Integers or Longs.
38+
for (Map.Entry<String, Number> entry : ((Map<String, Number>) map.get("freq")).entrySet()) {
39+
freq.put(entry.getKey(), entry.getValue().longValue());
40+
}
41+
for (Number n : (List<Number>) map.get("n_words")) {
42+
n_words.add(n.longValue());
2743
}
2844
}
2945

@@ -39,37 +55,19 @@ public void add(String gram) {
3955
if (freq.containsKey(gram)) {
4056
freq.put(gram, freq.get(gram) + 1);
4157
} else {
42-
freq.put(gram, 1);
58+
freq.put(gram, 1L);
4359
}
4460
}
4561

46-
public void setName(String name) {
47-
this.name = name;
48-
}
49-
5062
public String getName() {
5163
return name;
5264
}
5365

54-
public List<Integer> getNWords() {
66+
public List<Long> getNWords() {
5567
return n_words;
5668
}
5769

58-
public void setFreq(Map<String, Integer> freq) {
59-
this.freq = freq;
60-
}
61-
62-
public Map<String, Integer> getFreq() {
70+
public Map<String, Long> getFreq() {
6371
return freq;
6472
}
65-
66-
@SuppressWarnings("unchecked")
67-
public void read(InputStream input) throws IOException {
68-
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(input);
69-
Map<String,Object> map = parser.map();
70-
freq = (Map<String, Integer>) map.get("freq");
71-
name = (String)map.get("name");
72-
n_words = (List<Integer>)map.get("n_words");
73-
}
74-
7573
}

src/main/java/org/xbib/elasticsearch/common/langdetect/LangdetectService.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,7 @@ public void loadProfileFromResource(String resource, int index, int langsize) t
188188
if (in == null) {
189189
throw new IOException("profile '" + resource + "' not found");
190190
}
191-
LangProfile langProfile = new LangProfile();
192-
langProfile.read(in);
193-
addProfile(langProfile, index, langsize);
191+
addProfile(new LangProfile(in), index, langsize);
194192
}
195193

196194
public void addProfile(LangProfile profile, int index, int langsize) throws IOException {

src/main/resources/langdetect/merged-average/af

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ar

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/bg

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/bn

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ca

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/cs

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/da

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/de

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/el

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/en

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/es

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/et

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/fa

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/fi

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/fr

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/gu

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/he

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/hi

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/hr

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/hu

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/id

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/it

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ja

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/kn

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ko

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/lt

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/lv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/mk

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ml

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/mr

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ne

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/nl

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/no

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/pa

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/pl

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/pt

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ro

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ru

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/si

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/sk

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/sl

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/so

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/sq

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/sv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/sw

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ta

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/te

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/th

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/tl

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/tr

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/uk

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/ur

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/vi

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/zh-cn

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/main/resources/langdetect/merged-average/zh-tw

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectLanguageAccuracyTest.java

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public class DetectLanguageAccuracyTest extends Assert {
5454
private final String datasetName;
5555
private final int substringLength;
5656
private final int sampleSize;
57-
private final boolean useShortProfile;
57+
private final String profileParam;
5858
private final boolean useAllLanguages;
5959
private final Map<String, Double> languageToExpectedAccuracy;
6060

@@ -69,20 +69,20 @@ public class DetectLanguageAccuracyTest extends Assert {
6969
* @param datasetName multi-language dataset name, as read in the setup step (see {@link #setUp()})
7070
* @param substringLength substring length to test (see {@link #generateSubstringSample(String, int, int)})
7171
* @param sampleSize number of substrings to test (see {@link #generateSubstringSample(String, int, int)})
72-
* @param useShortProfile if true, the short text language profile will be used instead of the default profile
72+
* @param profileParam profile name parameter to pass to the detection service
7373
* @param useAllLanguages if true, all supported languages will be used instead of just the default ones
7474
* @param languageToExpectedAccuracy mapping from language code to expected accuracy
7575
*/
7676
public DetectLanguageAccuracyTest(String datasetName,
7777
int substringLength,
7878
int sampleSize,
79-
boolean useShortProfile,
79+
String profileParam,
8080
boolean useAllLanguages,
8181
Map<String, Double> languageToExpectedAccuracy) {
8282
this.datasetName = datasetName;
8383
this.substringLength = substringLength;
8484
this.sampleSize = sampleSize;
85-
this.useShortProfile = useShortProfile;
85+
this.profileParam = profileParam;
8686
this.useAllLanguages = useAllLanguages;
8787
this.languageToExpectedAccuracy = languageToExpectedAccuracy;
8888
}
@@ -104,7 +104,7 @@ public static void setUp() throws IOException {
104104
// Write column headers
105105
Files.write(
106106
outputPath,
107-
Collections.singletonList("datasetName,substringLength,sampleSize,useShortProfile,useAllLanguages," +
107+
Collections.singletonList("datasetName,substringLength,sampleSize,profileParam,useAllLanguages," +
108108
ALL_LANGUAGES),
109109
StandardCharsets.UTF_8
110110
);
@@ -121,12 +121,20 @@ public void test() throws IOException {
121121
// Set up the detection service according to the test's parameters
122122
String languageSetting = DEFAULT_LANGUAGES;
123123
if (useAllLanguages) {
124-
languageSetting = useShortProfile ? ALL_SHORT_PROFILE_LANGUAGES : ALL_DEFAULT_PROFILE_LANGUAGES;
124+
// TODO: This is a bit clunky. LangdetectService should support "all" as a language setting.
125+
if (profileParam.isEmpty()) {
126+
languageSetting = ALL_DEFAULT_PROFILE_LANGUAGES;
127+
} else if (profileParam.equals("short-text")) {
128+
languageSetting = ALL_SHORT_PROFILE_LANGUAGES;
129+
} else {
130+
assertEquals(profileParam, "merged-average");
131+
languageSetting = ALL_LANGUAGES;
132+
}
125133
}
126134
LangdetectService service = new LangdetectService(
127135
Settings.builder()
128136
.put("languages", languageSetting)
129-
.put("profile", useShortProfile ? "short-text" : "")
137+
.put("profile", profileParam)
130138
.build()
131139
);
132140
Map<String, List<String>> languageToFullTexts = multiLanguageDatasets.get(datasetName);
@@ -159,7 +167,7 @@ public void test() throws IOException {
159167
}
160168
} else {
161169
List<Object> row = new ArrayList<>();
162-
Collections.addAll(row, datasetName, substringLength, sampleSize, useShortProfile, useAllLanguages);
170+
Collections.addAll(row, datasetName, substringLength, sampleSize, profileParam, useAllLanguages);
163171
for (String language : ALL_LANGUAGES.split(",")) {
164172
row.add(languageToAccuracy.containsKey(language) ? languageToAccuracy.get(language) : Double.NaN);
165173
}
@@ -175,7 +183,7 @@ public void test() throws IOException {
175183
*
176184
* @return the parsed parameters
177185
*/
178-
@Parameterized.Parameters(name="{0}: substringLength={1} sampleSize={2} useShortProfile={3} useAllLanguages={4}")
186+
@Parameterized.Parameters(name="{0}: substringLength={1} sampleSize={2} profileParam={3} useAllLanguages={4}")
179187
public static Collection<Object[]> data() throws IOException {
180188
List<Object[]> data = new ArrayList<>();
181189
try (BufferedReader br = getResourceReader("accuracies.csv")) {
@@ -191,8 +199,8 @@ public static Collection<Object[]> data() throws IOException {
191199
scanner.nextInt(),
192200
// sampleSize
193201
scanner.nextInt(),
194-
// useShortProfile
195-
scanner.nextBoolean(),
202+
// profileParam
203+
scanner.next(),
196204
// useAllLanguages
197205
scanner.nextBoolean(),
198206
// languageToExpectedAccuracy

src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectorTest.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,19 @@ public static void setUp() throws Exception {
2424

2525
detect = new LangdetectService(Settings.EMPTY);
2626

27-
LangProfile profile_en = new LangProfile();
28-
profile_en.setName("en_test");
27+
LangProfile profile_en = new LangProfile("en_test");
2928
for (String w : TRAINING_EN.split(" ")) {
3029
profile_en.add(w);
3130
}
3231
detect.addProfile(profile_en, 0, 3);
3332

34-
LangProfile profile_fr = new LangProfile();
35-
profile_fr.setName("fr_test");
33+
LangProfile profile_fr = new LangProfile("fr_test");
3634
for (String w : TRAINING_FR.split(" ")) {
3735
profile_fr.add(w);
3836
}
3937
detect.addProfile(profile_fr, 1, 3);
4038

41-
LangProfile profile_ja = new LangProfile();
42-
profile_ja.setName("ja_test");
39+
LangProfile profile_ja = new LangProfile("ja_test");
4340
for (String w : TRAINING_JA.split(" ")) {
4441
profile_ja.add(w);
4542
}

src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangProfileTest.java

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,52 +8,48 @@ public class LangProfileTest extends Assert {
88

99
@Test
1010
public final void testLangProfile() {
11-
LangProfile profile = new LangProfile();
11+
LangProfile profile = new LangProfile((String) null);
1212
assertEquals(profile.getName(), null);
1313
}
1414

1515
@Test
1616
public final void testLangProfileStringInt() {
17-
LangProfile profile = new LangProfile();
18-
profile.setName("en");
17+
LangProfile profile = new LangProfile("en");
1918
assertEquals(profile.getName(), "en");
2019
}
2120

2221
@Test
2322
public final void testAdd() {
24-
LangProfile profile = new LangProfile();
25-
profile.setName("en");
23+
LangProfile profile = new LangProfile("en");
2624
profile.add("a");
27-
assertEquals((int) profile.getFreq().get("a"), 1);
25+
assertEquals((long) profile.getFreq().get("a"), 1);
2826
profile.add("a");
29-
assertEquals((int) profile.getFreq().get("a"), 2);
27+
assertEquals((long) profile.getFreq().get("a"), 2);
3028
//profile.omitLessFreq();
3129
}
3230

3331
@Test
3432
public final void testAddIllegally1() {
35-
LangProfile profile = new LangProfile();
33+
LangProfile profile = new LangProfile((String) null);
3634
profile.add("a");
3735
assertEquals(profile.getFreq().get("a"), null);
3836
}
3937

4038
@Test
4139
public final void testAddIllegally2() {
42-
LangProfile profile = new LangProfile();
43-
profile.setName("en");
40+
LangProfile profile = new LangProfile("en");
4441
profile.add("a");
4542
profile.add("");
4643
profile.add("abcd");
47-
assertEquals((int) profile.getFreq().get("a"), 1);
44+
assertEquals((long) profile.getFreq().get("a"), 1);
4845
assertEquals(profile.getFreq().get(""), null);
4946
assertEquals(profile.getFreq().get("abcd"), null);
5047

5148
}
5249

5350
@Test
5451
public final void testOmitLessFreq() {
55-
LangProfile profile = new LangProfile();
56-
profile.setName("en");
52+
LangProfile profile = new LangProfile("en");
5753
String[] grams = "a b c \u3042 \u3044 \u3046 \u3048 \u304a \u304b \u304c \u304d \u304e \u304f".split(" ");
5854
for (int i = 0; i < 5; ++i) {
5955
for (String g : grams) {
@@ -62,9 +58,9 @@ public final void testOmitLessFreq() {
6258
}
6359
profile.add("\u3050");
6460

65-
assertEquals((int) profile.getFreq().get("a"), 5);
66-
assertEquals((int) profile.getFreq().get("\u3042"), 5);
67-
assertEquals((int) profile.getFreq().get("\u3050"), 1);
61+
assertEquals((long) profile.getFreq().get("a"), 5);
62+
assertEquals((long) profile.getFreq().get("\u3042"), 5);
63+
assertEquals((long) profile.getFreq().get("\u3050"), 1);
6864
//profile.omitLessFreq();
6965
//assertEquals(profile.freq.get("a"), null);
7066
//assertEquals((int) profile.freq.get("\u3042"), 5);

0 commit comments

Comments
 (0)