Skip to content

Commit e2fb31a

Browse files
committed
bug fix,prefix blank char caused ArrayIndexOutOfBoundsException
1 parent 3570068 commit e2fb31a

File tree

7 files changed

+27
-31
lines changed

7 files changed

+27
-31
lines changed

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<modelVersion>4.0.0</modelVersion>
77
<groupId>org.elasticsearch</groupId>
88
<artifactId>elasticsearch-analysis-ik</artifactId>
9-
<version>1.2.4</version>
9+
<version>1.2.5</version>
1010
<packaging>jar</packaging>
1111
<description>IK Analyzer for ElasticSearch</description>
1212
<inceptionYear>2009</inceptionYear>
@@ -31,7 +31,7 @@
3131
</parent>
3232

3333
<properties>
34-
<elasticsearch.version>0.90.6</elasticsearch.version>
34+
<elasticsearch.version>0.90.2</elasticsearch.version>
3535
</properties>
3636

3737
<repositories>

src/main/java/org/wltea/analyzer/core/CJKSegmenter.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
*/
2626
package org.wltea.analyzer.core;
2727

28-
import java.util.LinkedList;
29-
import java.util.List;
30-
3128
import org.wltea.analyzer.dic.Dictionary;
3229
import org.wltea.analyzer.dic.Hit;
3330

31+
import java.util.LinkedList;
32+
import java.util.List;
33+
3434

3535
/**
3636
* 中文-日韩文子分词器
@@ -58,7 +58,7 @@ public void analyze(AnalyzeContext context) {
5858
//处理词段队列
5959
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
6060
for(Hit hit : tmpArray){
61-
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
61+
hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
6262
if(hit.isMatch()){
6363
//输出当前的词
6464
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@@ -77,7 +77,7 @@ public void analyze(AnalyzeContext context) {
7777

7878
//*********************************
7979
//再对当前指针位置的字符进行单字匹配
80-
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
80+
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
8181
if(singleCharHit.isMatch()){//首字成词
8282
//输出当前的词
8383
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);

src/main/java/org/wltea/analyzer/core/IKSegmenter.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@
2323
*/
2424
package org.wltea.analyzer.core;
2525

26-
import java.io.IOException;
27-
import java.io.Reader;
28-
import java.util.ArrayList;
29-
import java.util.List;
30-
3126
import org.elasticsearch.common.settings.Settings;
3227
import org.elasticsearch.env.Environment;
3328
import org.wltea.analyzer.cfg.Configuration;
3429
import org.wltea.analyzer.dic.Dictionary;
3530

31+
import java.io.IOException;
32+
import java.io.Reader;
33+
import java.util.ArrayList;
34+
import java.util.List;
35+
3636
/**
3737
* IK分词器主类
3838
*

src/main/java/org/wltea/analyzer/dic/DictSegment.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ Hit match(char[] charArray , int begin , int length , Hit searchHit){
114114
}
115115
//设置hit的当前处理位置
116116
searchHit.setEnd(begin);
117-
118-
Character keyChar = new Character(charArray[begin]);
117+
118+
Character keyChar = new Character(charArray[begin]);
119119
DictSegment ds = null;
120120

121121
//引用实例变量为本地变量,避免查询时遇到更新的同步问题

src/main/java/org/wltea/analyzer/dic/Dictionary.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,15 @@ public Hit matchInMainDict(char[] charArray){
152152
* @return Hit 匹配结果描述
153153
*/
154154
public Hit matchInMainDict(char[] charArray , int begin, int length){
155-
return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
155+
return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
156156
}
157157

158158
/**
159159
* 检索匹配量词词典
160160
* @return Hit 匹配结果描述
161161
*/
162162
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
163-
return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
163+
return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
164164
}
165165

166166

@@ -179,7 +179,7 @@ public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
179179
* @return boolean
180180
*/
181181
public boolean isStopWord(char[] charArray , int begin, int length){
182-
return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch();
182+
return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
183183
}
184184

185185
/**

src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@
2626
*/
2727
package org.wltea.analyzer.lucene;
2828

29-
import java.io.IOException;
30-
import java.io.Reader;
31-
3229
import org.apache.lucene.analysis.Tokenizer;
3330
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
3431
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -38,6 +35,9 @@
3835
import org.wltea.analyzer.core.IKSegmenter;
3936
import org.wltea.analyzer.core.Lexeme;
4037

38+
import java.io.IOException;
39+
import java.io.Reader;
40+
4141
/**
4242
* IK分词器 Lucene Tokenizer适配器类
4343
* 兼容Lucene 4.0版本
@@ -80,7 +80,7 @@ public boolean incrementToken() throws IOException {
8080
if(nextLexeme != null){
8181
//将Lexeme转成Attributes
8282
//设置词元文本
83-
termAtt.append(nextLexeme.getLexemeText());
83+
termAtt.append(nextLexeme.getLexemeText().toLowerCase());
8484
//设置词元长度
8585
termAtt.setLength(nextLexeme.getLength());
8686
//设置词元位移

src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,16 @@
2424
*/
2525
package org.wltea.analyzer.query;
2626

27+
import org.apache.lucene.index.Term;
28+
import org.apache.lucene.search.*;
29+
import org.apache.lucene.search.BooleanClause.Occur;
30+
import org.apache.lucene.util.BytesRef;
31+
2732
import java.util.ArrayList;
2833
import java.util.LinkedList;
2934
import java.util.List;
3035
import java.util.Stack;
3136

32-
import org.apache.lucene.index.Term;
33-
import org.apache.lucene.search.BooleanClause;
34-
import org.apache.lucene.search.BooleanQuery;
35-
import org.apache.lucene.search.Query;
36-
import org.apache.lucene.search.TermQuery;
37-
import org.apache.lucene.search.TermRangeQuery;
38-
import org.apache.lucene.search.BooleanClause.Occur;
39-
import org.apache.lucene.util.BytesRef;
40-
4137
/**
4238
* IK简易查询表达式解析
4339
* 结合SWMCQuery算法
@@ -66,7 +62,7 @@ public class IKQueryExpressionParser {
6662
*/
6763
public Query parseExp(String expression , boolean quickMode){
6864
Query lucenceQuery = null;
69-
if(expression != null && !"".equals(expression.trim())){
65+
if(expression != null && !"".equals(expression)){
7066
try{
7167
//文法解析
7268
this.splitElements(expression);

0 commit comments

Comments
 (0)