39
39
import org .apache .lucene .document .DoubleField ;
40
40
import org .apache .lucene .document .Field ;
41
41
import org .apache .lucene .document .IntField ;
42
+ import org .apache .lucene .document .KeywordField ;
42
43
import org .apache .lucene .document .LongField ;
43
- import org .apache .lucene .document .SortedDocValuesField ;
44
- import org .apache .lucene .document .StringField ;
45
44
import org .apache .lucene .index .IndexWriter ;
46
45
import org .apache .lucene .index .IndexWriterConfig ;
47
46
import org .apache .lucene .index .LogDocMergePolicy ;
@@ -85,7 +84,7 @@ private synchronized static byte[] readChunk(BufferedInputStream docs) throws IO
85
84
return chunk ;
86
85
}
87
86
88
- static void addOneField (Document doc , SimpleDateFormat dateParser , ParsePosition parsePosition , Field reuseField , Field reuseField2 , String rawValue ) {
87
+ static void addOneField (Document doc , SimpleDateFormat dateParser , ParsePosition parsePosition , Field reuseField , String rawValue ) {
89
88
switch (reuseField .name ()) {
90
89
case "vendor_id" :
91
90
case "green_vendor_id" :
@@ -105,8 +104,6 @@ static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition
105
104
BytesRef utf8Value = new BytesRef (rawValue );
106
105
reuseField .setBytesValue (utf8Value );
107
106
doc .add (reuseField );
108
- reuseField2 .setBytesValue (utf8Value );
109
- doc .add (reuseField2 );
110
107
break ;
111
108
}
112
109
case "pickup_datetime" :
@@ -190,7 +187,7 @@ static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition
190
187
191
188
/** Index all documents contained in one chunk */
192
189
static void indexOneChunk (SimpleDateFormat dateParser , ParsePosition parsePosition ,
193
- Field [][] reuseFields , Field [][] reuseFields2 , Document reuseDoc , Field cabColorField , Field cabColorDVField , byte [] chunk ,
190
+ Field [][] reuseFields , Document reuseDoc , Field cabColorField , byte [] chunk ,
194
191
IndexWriter w , AtomicInteger docCounter , AtomicLong bytesCounter ) throws IOException {
195
192
//System.out.println("CHUNK: " + chunk.length + " bytes");
196
193
String s = new String (chunk , 0 , chunk .length );
@@ -206,7 +203,6 @@ public Iterator<Document> iterator() {
206
203
private Document nextDoc ;
207
204
private boolean nextSet ;
208
205
private int lastLineStart ;
209
- private int chunkDocCount ;
210
206
private final BytesRef colorBytesRef = new BytesRef (new byte [1 ]);
211
207
212
208
@ Override
@@ -241,8 +237,6 @@ private void setNextDoc() {
241
237
colorBytesRef .bytes [0 ] = (byte ) color ;
242
238
cabColorField .setBytesValue (colorBytesRef );
243
239
reuseDoc .add (cabColorField );
244
- cabColorDVField .setBytesValue (colorBytesRef );
245
- reuseDoc .add (cabColorDVField );
246
240
int colorFieldIndex ;
247
241
if (color == 'g' ) {
248
242
colorFieldIndex = 0 ;
@@ -252,19 +246,17 @@ private void setNextDoc() {
252
246
throw new IllegalArgumentException ("expected color 'g' or 'y' but got '" + color + "'" );
253
247
}
254
248
Field [] colorReuseFields = reuseFields [colorFieldIndex ];
255
- Field [] colorReuseFields2 = reuseFields2 [colorFieldIndex ];
256
249
int lastFieldStart = i ;
257
250
while (true ) {
258
251
char c = s .charAt (i );
259
252
if (c == '\n' || c == ',' ) {
260
253
if (i > lastFieldStart ) {
261
- addOneField (reuseDoc , dateParser , parsePosition , colorReuseFields [fieldUpto ], colorReuseFields2 [ fieldUpto ], s .substring (lastFieldStart , i ));
254
+ addOneField (reuseDoc , dateParser , parsePosition , colorReuseFields [fieldUpto ], s .substring (lastFieldStart , i ));
262
255
}
263
256
if (c == '\n' ) {
264
257
if (fieldUpto != colorReuseFields .length -1 ) {
265
258
throw new AssertionError ("fieldUpto=" + fieldUpto + " vs fields.length-1=" + (colorReuseFields .length -1 ));
266
259
}
267
- chunkDocCount ++;
268
260
this .nextDoc = reuseDoc ;
269
261
int x = docCounter .incrementAndGet ();
270
262
long y = bytesCounter .addAndGet ((i +1 ) - lastLineStart );
@@ -375,7 +367,6 @@ public static void main(String[] args) throws Exception {
375
367
startNS = System .nanoTime ();
376
368
377
369
for (int i =0 ;i <threadCount ;i ++) {
378
- final int threadID = i ;
379
370
threads [i ] = new Thread () {
380
371
@ Override
381
372
public void run () {
@@ -395,15 +386,12 @@ private void _run() throws IOException {
395
386
396
387
// Setup fields & document to reuse
397
388
final Field [][] reuseFields = new Field [2 ][];
398
- final Field [][] reuseFields2 = new Field [2 ][];
399
389
400
390
// green's fields:
401
391
reuseFields [0 ] = new Field [fields .length ];
402
- reuseFields2 [0 ] = new Field [fields .length ];
403
392
404
393
// yellow's fields:
405
394
reuseFields [1 ] = new Field [fields .length ];
406
- reuseFields2 [1 ] = new Field [fields .length ];
407
395
408
396
409
397
for (int i =0 ;i <fields .length ;i ++) {
@@ -415,15 +403,11 @@ private void _run() throws IOException {
415
403
case "rate_code_id" :
416
404
case "store_and_fwd_flag" : {
417
405
if (sparse ) {
418
- reuseFields [0 ][i ] = new StringField ("green_" + fieldName , new BytesRef (), Field .Store .YES );
419
- reuseFields2 [0 ][i ] = new SortedDocValuesField ("green_" + fieldName , new BytesRef ());
420
- reuseFields [1 ][i ] = new StringField ("yellow_" + fieldName , new BytesRef (), Field .Store .YES );
421
- reuseFields2 [1 ][i ] = new SortedDocValuesField ("yellow_" + fieldName , new BytesRef ());
406
+ reuseFields [0 ][i ] = new KeywordField ("green_" + fieldName , new BytesRef (), Field .Store .YES );
407
+ reuseFields [1 ][i ] = new KeywordField ("yellow_" + fieldName , new BytesRef (), Field .Store .YES );
422
408
} else {
423
- reuseFields [0 ][i ] = new StringField (fieldName , new BytesRef (), Field .Store .YES );
424
- reuseFields2 [0 ][i ] = new SortedDocValuesField (fieldName , new BytesRef ());
409
+ reuseFields [0 ][i ] = new KeywordField (fieldName , new BytesRef (), Field .Store .YES );
425
410
reuseFields [1 ][i ] = reuseFields [0 ][i ];
426
- reuseFields2 [1 ][i ] = reuseFields2 [0 ][i ];
427
411
}
428
412
break ;
429
413
}
@@ -476,16 +460,15 @@ private void _run() throws IOException {
476
460
}
477
461
}
478
462
479
- Field cabColorField = new StringField ("cab_color" , new BytesRef (), Field .Store .NO );
480
- Field cabColorDVField = new SortedDocValuesField ("cab_color" , new BytesRef ());
463
+ Field cabColorField = new KeywordField ("cab_color" , new BytesRef (), Field .Store .NO );
481
464
Document reuseDoc = new Document ();
482
465
483
466
while (true ) {
484
467
byte [] chunk = readChunk (docs );
485
468
if (chunk == null ) {
486
469
break ;
487
470
}
488
- indexOneChunk (dateParser , parsePosition , reuseFields , reuseFields2 , reuseDoc , cabColorField , cabColorDVField , chunk , w , docCounter , bytesCounter );
471
+ indexOneChunk (dateParser , parsePosition , reuseFields , reuseDoc , cabColorField , chunk , w , docCounter , bytesCounter );
489
472
}
490
473
}
491
474
};
0 commit comments