1212package  com .hankcs .hanlp .dictionary .common ;
1313
1414import  com .hankcs .hanlp .collection .trie .DoubleArrayTrie ;
15+ import  com .hankcs .hanlp .corpus .io .ByteArray ;
1516import  com .hankcs .hanlp .corpus .io .IOUtil ;
16- import  com .hankcs .hanlp .dictionary . BaseSearcher ;
17+ import  com .hankcs .hanlp .utility . TextUtility ;
1718
18- import  java .io .*;
19+ import  java .io .BufferedReader ;
20+ import  java .io .DataOutputStream ;
21+ import  java .io .IOException ;
22+ import  java .io .InputStreamReader ;
1923import  java .util .*;
2024
25+ import  static  com .hankcs .hanlp .utility .Predefine .BIN_EXT ;
2126import  static  com .hankcs .hanlp .utility .Predefine .logger ;
2227
2328/** 
@@ -29,68 +34,84 @@ public abstract class CommonDictionary<V>
2934{
3035    DoubleArrayTrie <V > trie ;
3136
37+     protected  abstract  V [] loadValueArray (ByteArray  byteArray );
38+ 
3239    public  boolean  load (String  path )
3340    {
3441        trie  = new  DoubleArrayTrie <V >();
3542        long  start  = System .currentTimeMillis ();
36-         V [] valueArray  = onLoadValue (path );
37-         if  (valueArray  == null )
38-         {
39-             logger .info ("加载值"  + path  + ".value.dat失败,耗时"  + (System .currentTimeMillis () - start ) + "ms" );
40-             return  false ;
41-         }
42-         logger .info ("加载值"  + path  + ".value.dat成功,耗时"  + (System .currentTimeMillis () - start ) + "ms" );
43-         start  = System .currentTimeMillis ();
44-         if  (loadDat (path  + ".trie.dat" , valueArray ))
43+         if  (loadDat (ByteArray .createByteArray (path  + BIN_EXT )))
4544        {
46-             logger .info ("加载键"  + path  + ".trie.dat成功,耗时"  + (System .currentTimeMillis () - start ) + "ms" );
4745            return  true ;
4846        }
49-         List <String >  keyList  = new  ArrayList <String >( valueArray . length );
47+         TreeMap <String ,  V >  map  = new  TreeMap <String ,  V >( );
5048        try 
5149        {
5250            BufferedReader  br  = new  BufferedReader (new  InputStreamReader (IOUtil .newInputStream (path ), "UTF-8" ));
5351            String  line ;
5452            while  ((line  = br .readLine ()) != null )
5553            {
5654                String [] paramArray  = line .split ("\\ s" );
57-                 keyList . add (paramArray [0 ]);
55+                 map . put (paramArray [0 ],  createValue ( paramArray ) );
5856            }
5957            br .close ();
6058        }
6159        catch  (Exception  e )
6260        {
6361            logger .warning ("读取"  + path  + "失败"  + e );
6462        }
65-         int  resultCode  = trie .build (keyList , valueArray );
63+         Set <Map .Entry <String , V >> entrySet  = map .entrySet ();
64+         List <String > keyList  = new  ArrayList <String >(entrySet .size ());
65+         List <V > valueList  = new  ArrayList <V >(entrySet .size ());
66+         for  (Map .Entry <String , V > entry  : entrySet )
67+         {
68+             keyList .add (entry .getKey ());
69+             valueList .add (entry .getValue ());
70+         }
71+         int  resultCode  = trie .build (keyList , valueList );
6672        if  (resultCode  != 0 )
6773        {
68-             logger .warning ("trie建立失败"  + resultCode  + ",正在尝试排序后重载" );
69-             TreeMap <String , V > map  = new  TreeMap <String , V >();
70-             for  (int  i  = 0 ; i  < valueArray .length ; ++i )
71-             {
72-                 map .put (keyList .get (i ), valueArray [i ]);
73-             }
74-             trie  = new  DoubleArrayTrie <V >();
75-             trie .build (map );
76-             int  i  = 0 ;
77-             for  (V  v  : map .values ())
78-             {
79-                 valueArray [i ++] = v ;
80-             }
74+             logger .warning ("trie建立失败" );
75+             return  false ;
8176        }
82-         trie .save (path  + ".trie.dat" );
83-         onSaveValue (valueArray , path );
84-         logger .info (path  + "加载成功" );
77+         logger .info (path  + "加载成功,耗时"  + (System .currentTimeMillis () - start ) + "ms" );
78+         saveDat (path  + BIN_EXT , valueList );
8579        return  true ;
8680    }
8781
88-     private  boolean  loadDat (String  path , V [] valueArray )
82+     protected  boolean  loadDat (ByteArray  byteArray )
83+     {
84+         V [] valueArray  = loadValueArray (byteArray );
85+         if  (valueArray  == null )
86+         {
87+             return  false ;
88+         }
89+         return  trie .load (byteArray , valueArray );
90+     }
91+ 
92+     protected  boolean  saveDat (String  path , List <V > valueArray )
8993    {
90-         if  (trie .load (path , valueArray )) return  true ;
91-         return  false ;
94+         try 
95+         {
96+             DataOutputStream  out  = new  DataOutputStream (IOUtil .newOutputStream (path ));
97+             out .writeInt (valueArray .size ());
98+             for  (V  item  : valueArray )
99+             {
100+                 saveValue (item , out );
101+             }
102+             trie .save (out );
103+             out .close ();
104+         }
105+         catch  (Exception  e )
106+         {
107+             logger .warning ("保存失败"  + TextUtility .exceptionToString (e ));
108+             return  false ;
109+         }
110+         return  true ;
92111    }
93112
113+     protected  abstract  void  saveValue (V  value , DataOutputStream  out ) throws  IOException ;
114+ 
94115    /** 
95116     * 查询一个单词 
96117     * 
@@ -123,103 +144,5 @@ public int size()
123144        return  trie .size ();
124145    }
125146
126-     /** 
127-      * 排序这个词典 
128-      * 
129-      * @param path 
130-      * @return 
131-      */ 
132-     public  static  boolean  sort (String  path )
133-     {
134-         TreeMap <String , String > map  = new  TreeMap <String , String >();
135-         try 
136-         {
137-             BufferedReader  br  = new  BufferedReader (new  InputStreamReader (IOUtil .newInputStream (path ), "UTF-8" ));
138-             String  line ;
139-             while  ((line  = br .readLine ()) != null )
140-             {
141-                 String [] argArray  = line .split ("\\ s" );
142-                 map .put (argArray [0 ], line );
143-             }
144-             br .close ();
145-             // 输出它们 
146-             BufferedWriter  bw  = new  BufferedWriter (new  OutputStreamWriter (IOUtil .newOutputStream (path )));
147-             for  (Map .Entry <String , String > entry  : map .entrySet ())
148-             {
149-                 bw .write (entry .getValue ());
150-                 bw .newLine ();
151-             }
152-             bw .close ();
153-         }
154-         catch  (Exception  e )
155-         {
156-             logger .warning ("读取"  + path  + "失败"  + e );
157-             return  false ;
158-         }
159-         return  true ;
160-     }
161- 
162-     /** 
163-      * 实现此方法来加载值 
164-      * 
165-      * @param path 
166-      * @return 
167-      */ 
168-     protected  abstract  V [] onLoadValue (String  path );
169- 
170-     protected  abstract  boolean  onSaveValue (V [] valueArray , String  path );
171- 
172-     public  BaseSearcher  getSearcher (String  text )
173-     {
174-         return  new  Searcher (text );
175-     }
176- 
177-     /** 
178-      * 前缀搜索,长短都可匹配 
179-      */ 
180-     public  class  Searcher  extends  BaseSearcher <V >
181-     {
182-         /** 
183-          * 分词从何处开始,这是一个状态 
184-          */ 
185-         int  begin ;
186- 
187-         private  List <Map .Entry <String , V >> entryList ;
188- 
189-         protected  Searcher (char [] c )
190-         {
191-             super (c );
192-         }
193- 
194-         protected  Searcher (String  text )
195-         {
196-             super (text );
197-             entryList  = new  LinkedList <Map .Entry <String , V >>();
198-         }
199- 
200-         @ Override 
201-         public  Map .Entry <String , V > next ()
202-         {
203-             // 保证首次调用找到一个词语 
204-             while  (entryList .size () == 0  && begin  < c .length )
205-             {
206-                 entryList  = trie .commonPrefixSearchWithValue (c , begin );
207-                 ++begin ;
208-             }
209-             // 之后调用仅在缓存用完的时候调用一次 
210-             if  (entryList .size () == 0  && begin  < c .length )
211-             {
212-                 entryList  = trie .commonPrefixSearchWithValue (c , begin );
213-                 ++begin ;
214-             }
215-             if  (entryList .size () == 0 )
216-             {
217-                 return  null ;
218-             }
219-             Map .Entry <String , V > result  = entryList .get (0 );
220-             entryList .remove (0 );
221-             offset  = begin  - 1 ;
222-             return  result ;
223-         }
224-     }
147+     protected  abstract  V  createValue (String [] params );
225148}
0 commit comments