Skip to content

Commit 0ee1ffe

Browse files
committed
Remove contention on BufferReader#readLine.
1 parent 90a9b6a commit 0ee1ffe

File tree

1 file changed

+57
-9
lines changed

1 file changed

+57
-9
lines changed

src/main/perf/IndexGeoNames.java

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,13 @@
2525
import java.nio.file.Paths;
2626
import java.text.ParsePosition;
2727
import java.text.SimpleDateFormat;
28+
import java.util.ArrayDeque;
2829
import java.util.Date;
30+
import java.util.Deque;
2931
import java.util.Locale;
32+
import java.util.concurrent.ArrayBlockingQueue;
33+
import java.util.concurrent.TimeUnit;
34+
import java.util.concurrent.atomic.AtomicBoolean;
3035
import java.util.concurrent.atomic.AtomicInteger;
3136

3237
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -40,12 +45,11 @@
4045
import org.apache.lucene.document.SortedDocValuesField;
4146
import org.apache.lucene.document.StringField;
4247
import org.apache.lucene.document.TextField;
43-
import org.apache.lucene.index.DirectoryReader;
4448
//import org.apache.lucene.index.IndexReader;
4549
import org.apache.lucene.index.IndexWriter;
4650
import org.apache.lucene.index.IndexWriterConfig;
47-
import org.apache.lucene.index.NoMergePolicy;
4851
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
52+
import org.apache.lucene.index.NoMergePolicy;
4953
import org.apache.lucene.store.Directory;
5054
import org.apache.lucene.store.FSDirectory;
5155
import org.apache.lucene.util.BytesRef;
@@ -59,6 +63,8 @@
5963

6064
public class IndexGeoNames {
6165

66+
private static final int BATCH_SIZE = 128;
67+
6268
final static boolean normal = false;
6369

6470
public static void main(String[] args) throws Exception {
@@ -97,6 +103,9 @@ public static void main(String[] args) throws Exception {
97103
// With reuse it's ~ 38% faster (41.8 sec vs 67.0 sec):
98104
final boolean reuseDocAndFields = false;
99105

106+
final AtomicBoolean done = new AtomicBoolean();
107+
final ArrayBlockingQueue<Deque<String>> workQueue = new ArrayBlockingQueue<>(1000);
108+
100109
for(int i=0;i<numThreads;i++) {
101110
threads[i] = new Thread() {
102111
@Override
@@ -137,11 +146,22 @@ public void run() {
137146
SortedDocValuesField tzDV = new SortedDocValuesField("timezone", new BytesRef());
138147
doc.add(tzDV);
139148

149+
Deque<String> batch = null;
140150
while (true) {
141151
try {
142152

143-
// Curiously BufferedReader.readLine seems to be thread-safe...
144-
String line = reader.readLine();
153+
if (batch == null || batch.isEmpty()) {
154+
batch = workQueue.poll(100, TimeUnit.MILLISECONDS);
155+
if (batch == null) {
156+
if (done.get()) {
157+
break;
158+
} else {
159+
continue;
160+
}
161+
}
162+
}
163+
164+
String line = batch.poll();
145165
if (line == null) {
146166
break;
147167
}
@@ -211,11 +231,22 @@ public void run() {
211231
}
212232
}
213233
} else {
234+
Deque<String> batch = null;
214235
while (true) {
215236
try {
216237

217-
// Curiously BufferedReader.readLine seems to be thread-safe...
218-
String line = reader.readLine();
238+
if (batch == null || batch.isEmpty()) {
239+
batch = workQueue.poll(100, TimeUnit.MILLISECONDS);
240+
if (batch == null) {
241+
if (done.get()) {
242+
break;
243+
} else {
244+
continue;
245+
}
246+
}
247+
}
248+
249+
String line = batch.poll();
219250
if (line == null) {
220251
break;
221252
}
@@ -284,10 +315,27 @@ public void run() {
284315
}
285316
}
286317
};
287-
threads[i].start();
288318
}
289-
for(int i=0;i<numThreads;i++) {
290-
threads[i].join();
319+
for (Thread thread : threads) {
320+
thread.start();
321+
}
322+
Deque<String> batch = new ArrayDeque<>();
323+
for (String line = reader.readLine(); ; line = reader.readLine()) {
324+
if (line == null) {
325+
if (batch.isEmpty() == false) {
326+
workQueue.put(batch);
327+
}
328+
break;
329+
}
330+
batch.add(line);
331+
if (batch.size() == BATCH_SIZE) {
332+
workQueue.put(batch);
333+
batch = new ArrayDeque<>();
334+
}
335+
}
336+
done.set(true);
337+
for (Thread thread : threads) {
338+
thread.join();
291339
}
292340
long ms = System.currentTimeMillis();
293341
System.out.println(docsIndexed + ": " + ((ms - startMS)/1000.0) + " sec");

0 commit comments

Comments
 (0)