Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@ Bug fixes
* LUCENE-10114: Remove unused byte order mark in Lucene90PostingsWriter. This
was initially introduced by accident in Lucene 8.4. (Uwe Schindler)

* LUCENE-10140: Fix cases where minimizing interval iterators could return
incorrect matches (Nikolay Khitrin, Alan Woodward)

Changes in Backwards Compatibility Policy

* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
}

private BlockIntervalsSource(List<IntervalsSource> sources) {
super(flatten(sources), true);
super(flatten(sources));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMatchesIterator {

private boolean positioned = false;
private int[] posAndOffsets = new int[4 * 4];
private Query[] matchingQueries = new Query[4];
private int count = 0;
Expand All @@ -34,7 +33,7 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
super(in);
}

private void cache() throws IOException {
void cache() throws IOException {
count = 0;
MatchesIterator mi = in.getSubMatches();
if (mi == null) {
Expand Down Expand Up @@ -62,32 +61,25 @@ private void cache() throws IOException {

@Override
public boolean next() throws IOException {
if (positioned == false) {
positioned = true;
} else {
cache();
}
return in.next();
}

int startOffset(int endPos) throws IOException {
if (endPosition() <= endPos) {
return in.startOffset();
}
@Override
public int startOffset() throws IOException {
return posAndOffsets[2];
}

int endOffset(int endPos) throws IOException {
if (endPosition() <= endPos) {
return in.endOffset();
}
return posAndOffsets[count * 4 + 3];
@Override
public int endOffset() throws IOException {
return posAndOffsets[(count - 1) * 4 + 3];
}

MatchesIterator getSubMatches(int endPos) throws IOException {
if (endPosition() <= endPos) {
cache();
}
@Override
public MatchesIterator getSubMatches() {
// We always return a submatches, even if there's only a single
// cached submatch, because this way we can return the correct
// positions - the positions of the top-level match may have
// moved on due to minimization
return new MatchesIterator() {

int upto = -1;
Expand Down Expand Up @@ -130,6 +122,11 @@ public Query getQuery() {
};
}

@Override
public Query getQuery() {
return matchingQueries[0];
}

@Override
public int gaps() {
return ((IntervalMatchesIterator) in).gaps();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,16 @@
import java.util.stream.Collectors;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FilterMatchesIterator;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;

abstract class ConjunctionIntervalsSource extends IntervalsSource {

protected final List<IntervalsSource> subSources;
protected final boolean isMinimizing;

protected ConjunctionIntervalsSource(List<IntervalsSource> subSources, boolean isMinimizing) {
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources) {
assert subSources.size() > 1;
this.subSources = subSources;
this.isMinimizing = isMinimizing;
}

@Override
Expand Down Expand Up @@ -73,9 +68,6 @@ public final IntervalMatchesIterator matches(String field, LeafReaderContext ctx
if (mi == null) {
return null;
}
if (isMinimizing) {
mi = new CachingMatchesIterator(mi);
}
subs.add(mi);
}
IntervalIterator it =
Expand All @@ -89,103 +81,6 @@ public final IntervalMatchesIterator matches(String field, LeafReaderContext ctx
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
return null;
}
return isMinimizing
? new MinimizingConjunctionMatchesIterator(it, subs)
: new ConjunctionMatchesIterator(it, subs);
}

private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {

final IntervalIterator iterator;
final List<IntervalMatchesIterator> subs;
boolean cached = true;

private ConjunctionMatchesIterator(
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
this.iterator = iterator;
this.subs = subs;
}

@Override
public boolean next() throws IOException {
if (cached) {
cached = false;
return true;
}
return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
}

@Override
public int startPosition() {
return iterator.start();
}

@Override
public int endPosition() {
return iterator.end();
}

@Override
public int startOffset() throws IOException {
int start = Integer.MAX_VALUE;
for (MatchesIterator s : subs) {
start = Math.min(start, s.startOffset());
}
return start;
}

@Override
public int endOffset() throws IOException {
int end = -1;
for (MatchesIterator s : subs) {
end = Math.max(end, s.endOffset());
}
return end;
}

@Override
public MatchesIterator getSubMatches() throws IOException {
List<MatchesIterator> subMatches = new ArrayList<>();
for (MatchesIterator mi : subs) {
MatchesIterator sub = mi.getSubMatches();
if (sub == null) {
sub = new SingletonMatchesIterator(mi);
}
subMatches.add(sub);
}
return MatchesUtils.disjunction(subMatches);
}

@Override
public Query getQuery() {
throw new UnsupportedOperationException();
}

@Override
public int gaps() {
return iterator.gaps();
}

@Override
public int width() {
return iterator.width();
}
}

static class SingletonMatchesIterator extends FilterMatchesIterator {

boolean exhausted = false;

SingletonMatchesIterator(MatchesIterator in) {
super(in);
}

@Override
public boolean next() {
if (exhausted) {
return false;
}
return exhausted = true;
}
return new ConjunctionMatchesIterator(it, subs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,21 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.FilterMatchesIterator;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;

class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
class ConjunctionMatchesIterator implements IntervalMatchesIterator {

final IntervalIterator iterator;
private final List<CachingMatchesIterator> subs = new ArrayList<>();
private boolean cached = true;
final List<? extends IntervalMatchesIterator> subs;
boolean cached = true;

MinimizingConjunctionMatchesIterator(
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
ConjunctionMatchesIterator(
IntervalIterator iterator, List<? extends IntervalMatchesIterator> subs) {
this.iterator = iterator;
for (MatchesIterator mi : subs) {
assert mi instanceof CachingMatchesIterator;
this.subs.add((CachingMatchesIterator) mi);
}
this.subs = subs;
}

@Override
Expand All @@ -61,23 +59,39 @@ public int endPosition() {
@Override
public int startOffset() throws IOException {
int start = Integer.MAX_VALUE;
int endPos = endPosition();
for (CachingMatchesIterator s : subs) {
start = Math.min(start, s.startOffset(endPos));
for (MatchesIterator s : subs) {
start = Math.min(start, s.startOffset());
}
return start;
}

@Override
public int endOffset() throws IOException {
int end = 0;
int endPos = endPosition();
for (CachingMatchesIterator s : subs) {
end = Math.max(end, s.endOffset(endPos));
int end = -1;
for (MatchesIterator s : subs) {
end = Math.max(end, s.endOffset());
}
return end;
}

@Override
public MatchesIterator getSubMatches() throws IOException {
List<MatchesIterator> subMatches = new ArrayList<>();
for (MatchesIterator mi : subs) {
MatchesIterator sub = mi.getSubMatches();
if (sub == null) {
sub = new SingletonMatchesIterator(mi);
}
subMatches.add(sub);
}
return MatchesUtils.disjunction(subMatches);
}

@Override
public Query getQuery() {
throw new UnsupportedOperationException();
}

@Override
public int gaps() {
return iterator.gaps();
Expand All @@ -88,18 +102,20 @@ public int width() {
return iterator.width();
}

@Override
public MatchesIterator getSubMatches() throws IOException {
List<MatchesIterator> mis = new ArrayList<>();
int endPos = endPosition();
for (CachingMatchesIterator s : subs) {
mis.add(s.getSubMatches(endPos));
static class SingletonMatchesIterator extends FilterMatchesIterator {

boolean exhausted = false;

SingletonMatchesIterator(MatchesIterator in) {
super(in);
}
return MatchesUtils.disjunction(mis);
}

@Override
public Query getQuery() {
return null;
@Override
public boolean next() {
if (exhausted) {
return false;
}
return exhausted = true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static IntervalsSource build(IntervalsSource small, IntervalsSource big) {
private final IntervalsSource big;

private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
super(Arrays.asList(small, big), false);
super(Arrays.asList(small, big));
this.small = small;
this.big = big;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static IntervalsSource build(IntervalsSource big, IntervalsSource small) {
}

private ContainingIntervalsSource(IntervalsSource big, IntervalsSource small) {
super(Arrays.asList(big, small), false);
super(Arrays.asList(big, small));
this.big = big;
this.small = small;
}
Expand Down
Loading