Skip to content

Commit 45ab7cf

Browse files
committed
Switch to KeywordField.
1 parent 03b0a90 commit 45ab7cf

File tree

1 file changed

+9
-26
lines changed

1 file changed

+9
-26
lines changed

src/main/perf/IndexTaxis.java

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,8 @@
3939
import org.apache.lucene.document.DoubleField;
4040
import org.apache.lucene.document.Field;
4141
import org.apache.lucene.document.IntField;
42+
import org.apache.lucene.document.KeywordField;
4243
import org.apache.lucene.document.LongField;
43-
import org.apache.lucene.document.SortedDocValuesField;
44-
import org.apache.lucene.document.StringField;
4544
import org.apache.lucene.index.IndexWriter;
4645
import org.apache.lucene.index.IndexWriterConfig;
4746
import org.apache.lucene.index.LogDocMergePolicy;
@@ -85,7 +84,7 @@ private synchronized static byte[] readChunk(BufferedInputStream docs) throws IO
8584
return chunk;
8685
}
8786

88-
static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition parsePosition, Field reuseField, Field reuseField2, String rawValue) {
87+
static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition parsePosition, Field reuseField, String rawValue) {
8988
switch(reuseField.name()) {
9089
case "vendor_id":
9190
case "green_vendor_id":
@@ -105,8 +104,6 @@ static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition
105104
BytesRef utf8Value = new BytesRef(rawValue);
106105
reuseField.setBytesValue(utf8Value);
107106
doc.add(reuseField);
108-
reuseField2.setBytesValue(utf8Value);
109-
doc.add(reuseField2);
110107
break;
111108
}
112109
case "pickup_datetime":
@@ -190,7 +187,7 @@ static void addOneField(Document doc, SimpleDateFormat dateParser, ParsePosition
190187

191188
/** Index all documents contained in one chunk */
192189
static void indexOneChunk(SimpleDateFormat dateParser, ParsePosition parsePosition,
193-
Field[][] reuseFields, Field[][] reuseFields2, Document reuseDoc, Field cabColorField, Field cabColorDVField, byte[] chunk,
190+
Field[][] reuseFields, Document reuseDoc, Field cabColorField, byte[] chunk,
194191
IndexWriter w, AtomicInteger docCounter, AtomicLong bytesCounter) throws IOException {
195192
//System.out.println("CHUNK: " + chunk.length + " bytes");
196193
String s = new String(chunk, 0, chunk.length);
@@ -206,7 +203,6 @@ public Iterator<Document> iterator() {
206203
private Document nextDoc;
207204
private boolean nextSet;
208205
private int lastLineStart;
209-
private int chunkDocCount;
210206
private final BytesRef colorBytesRef = new BytesRef(new byte[1]);
211207

212208
@Override
@@ -241,8 +237,6 @@ private void setNextDoc() {
241237
colorBytesRef.bytes[0] = (byte) color;
242238
cabColorField.setBytesValue(colorBytesRef);
243239
reuseDoc.add(cabColorField);
244-
cabColorDVField.setBytesValue(colorBytesRef);
245-
reuseDoc.add(cabColorDVField);
246240
int colorFieldIndex;
247241
if (color == 'g') {
248242
colorFieldIndex = 0;
@@ -252,19 +246,17 @@ private void setNextDoc() {
252246
throw new IllegalArgumentException("expected color 'g' or 'y' but got '" + color + "'");
253247
}
254248
Field[] colorReuseFields = reuseFields[colorFieldIndex];
255-
Field[] colorReuseFields2 = reuseFields2[colorFieldIndex];
256249
int lastFieldStart = i;
257250
while (true) {
258251
char c = s.charAt(i);
259252
if (c == '\n' || c == ',') {
260253
if (i > lastFieldStart) {
261-
addOneField(reuseDoc, dateParser, parsePosition, colorReuseFields[fieldUpto], colorReuseFields2[fieldUpto], s.substring(lastFieldStart, i));
254+
addOneField(reuseDoc, dateParser, parsePosition, colorReuseFields[fieldUpto], s.substring(lastFieldStart, i));
262255
}
263256
if (c == '\n') {
264257
if (fieldUpto != colorReuseFields.length-1) {
265258
throw new AssertionError("fieldUpto=" + fieldUpto + " vs fields.length-1=" + (colorReuseFields.length-1));
266259
}
267-
chunkDocCount++;
268260
this.nextDoc = reuseDoc;
269261
int x = docCounter.incrementAndGet();
270262
long y = bytesCounter.addAndGet((i+1) - lastLineStart);
@@ -375,7 +367,6 @@ public static void main(String[] args) throws Exception {
375367
startNS = System.nanoTime();
376368

377369
for(int i=0;i<threadCount;i++) {
378-
final int threadID = i;
379370
threads[i] = new Thread() {
380371
@Override
381372
public void run() {
@@ -395,15 +386,12 @@ private void _run() throws IOException {
395386

396387
// Setup fields & document to reuse
397388
final Field[][] reuseFields = new Field[2][];
398-
final Field[][] reuseFields2 = new Field[2][];
399389

400390
// green's fields:
401391
reuseFields[0] = new Field[fields.length];
402-
reuseFields2[0] = new Field[fields.length];
403392

404393
// yellow's fields:
405394
reuseFields[1] = new Field[fields.length];
406-
reuseFields2[1] = new Field[fields.length];
407395

408396

409397
for(int i=0;i<fields.length;i++) {
@@ -415,15 +403,11 @@ private void _run() throws IOException {
415403
case "rate_code_id":
416404
case "store_and_fwd_flag": {
417405
if (sparse) {
418-
reuseFields[0][i] = new StringField("green_" + fieldName, new BytesRef(), Field.Store.YES);
419-
reuseFields2[0][i] = new SortedDocValuesField("green_" + fieldName, new BytesRef());
420-
reuseFields[1][i] = new StringField("yellow_" + fieldName, new BytesRef(), Field.Store.YES);
421-
reuseFields2[1][i] = new SortedDocValuesField("yellow_" + fieldName, new BytesRef());
406+
reuseFields[0][i] = new KeywordField("green_" + fieldName, new BytesRef(), Field.Store.YES);
407+
reuseFields[1][i] = new KeywordField("yellow_" + fieldName, new BytesRef(), Field.Store.YES);
422408
} else {
423-
reuseFields[0][i] = new StringField(fieldName, new BytesRef(), Field.Store.YES);
424-
reuseFields2[0][i] = new SortedDocValuesField(fieldName, new BytesRef());
409+
reuseFields[0][i] = new KeywordField(fieldName, new BytesRef(), Field.Store.YES);
425410
reuseFields[1][i] = reuseFields[0][i];
426-
reuseFields2[1][i] = reuseFields2[0][i];
427411
}
428412
break;
429413
}
@@ -476,16 +460,15 @@ private void _run() throws IOException {
476460
}
477461
}
478462

479-
Field cabColorField = new StringField("cab_color", new BytesRef(), Field.Store.NO);
480-
Field cabColorDVField = new SortedDocValuesField("cab_color", new BytesRef());
463+
Field cabColorField = new KeywordField("cab_color", new BytesRef(), Field.Store.NO);
481464
Document reuseDoc = new Document();
482465

483466
while (true) {
484467
byte[] chunk = readChunk(docs);
485468
if (chunk == null) {
486469
break;
487470
}
488-
indexOneChunk(dateParser, parsePosition, reuseFields, reuseFields2, reuseDoc, cabColorField, cabColorDVField, chunk, w, docCounter, bytesCounter);
471+
indexOneChunk(dateParser, parsePosition, reuseFields, reuseDoc, cabColorField, chunk, w, docCounter, bytesCounter);
489472
}
490473
}
491474
};

0 commit comments

Comments
 (0)