Skip to content

Commit e50be03

Browse files
committed
提高自定义词条以空格开头或结尾时的健壮性
1 parent 98b407d commit e50be03

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/main/java/com/hankcs/hanlp/seg/Segment.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,13 @@ protected static List<AtomNode> quickAtomSegment(char[] charArray, int start, in
196196
*/
197197
protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList)
198198
{
199+
assert vertexList.size() > 2 : "vertexList至少包含 始##始 和 末##末";
199200
Vertex[] wordNet = new Vertex[vertexList.size()];
200201
vertexList.toArray(wordNet);
201202
// DAT合并
202203
DoubleArrayTrie<CoreDictionary.Attribute> dat = CustomDictionary.dat;
203-
for (int i = 0; i < wordNet.length; ++i)
204+
int length = wordNet.length - 1; // 跳过首尾
205+
for (int i = 1; i < length; ++i)
204206
{
205207
int state = 1;
206208
state = dat.transition(wordNet[i].realWord, state);
@@ -209,7 +211,7 @@ protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList)
209211
int to = i + 1;
210212
int end = to;
211213
CoreDictionary.Attribute value = dat.output(state);
212-
for (; to < wordNet.length; ++to)
214+
for (; to < length; ++to)
213215
{
214216
state = dat.transition(wordNet[to].realWord, state);
215217
if (state < 0) break;
@@ -230,7 +232,7 @@ protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList)
230232
// BinTrie合并
231233
if (CustomDictionary.trie != null)
232234
{
233-
for (int i = 0; i < wordNet.length; ++i)
235+
for (int i = 1; i < length; ++i)
234236
{
235237
if (wordNet[i] == null) continue;
236238
BaseNode<CoreDictionary.Attribute> state = CustomDictionary.trie.transition(wordNet[i].realWord.toCharArray(), 0);
@@ -239,7 +241,7 @@ protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList)
239241
int to = i + 1;
240242
int end = to;
241243
CoreDictionary.Attribute value = state.getValue();
242-
for (; to < wordNet.length; ++to)
244+
for (; to < length; ++to)
243245
{
244246
if (wordNet[to] == null) continue;
245247
state = state.transition(wordNet[to].realWord.toCharArray(), 0);

0 commit comments

Comments
 (0)