-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Enable concurrent intra merge for HNSW graphs #108164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
mayya-sharipova
wants to merge
1
commit into
elastic:lucene_snapshot
Choose a base branch
from
mayya-sharipova:hnsw-concurrent-merge
base: lucene_snapshot
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,10 +159,12 @@ public static class Builder extends FieldMapper.Builder { | |
private final Parameter<Map<String, String>> meta = Parameter.metaParam(); | ||
|
||
final IndexVersion indexVersionCreated; | ||
final int mergeThreadCount; | ||
|
||
public Builder(String name, IndexVersion indexVersionCreated) { | ||
public Builder(String name, IndexVersion indexVersionCreated, int mergeThreadCount) { | ||
super(name); | ||
this.indexVersionCreated = indexVersionCreated; | ||
this.mergeThreadCount = mergeThreadCount; | ||
final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); | ||
final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); | ||
this.indexed = Parameter.indexParam(m -> toType(m).fieldType().indexed, indexedByDefault); | ||
|
@@ -255,6 +257,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { | |
), | ||
indexOptions.getValue(), | ||
indexVersionCreated, | ||
mergeThreadCount, | ||
multiFieldsBuilder.build(this, context), | ||
copyTo | ||
); | ||
|
@@ -838,7 +841,7 @@ private abstract static class IndexOptions implements ToXContent { | |
this.type = type; | ||
} | ||
|
||
abstract KnnVectorsFormat getVectorsFormat(); | ||
abstract KnnVectorsFormat getVectorsFormat(int mergeThreadCount); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something about this really bugs me. The merge thread count is a dynamically updatable value. But doing this, is it really dynamic for the workers in the dense vector field mapper? |
||
|
||
boolean supportsElementType(ElementType elementType) { | ||
return true; | ||
|
@@ -938,7 +941,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws | |
} | ||
|
||
@Override | ||
KnnVectorsFormat getVectorsFormat() { | ||
KnnVectorsFormat getVectorsFormat(int mergeThreadCount) { | ||
return new ES813Int8FlatVectorFormat(confidenceInterval); | ||
} | ||
|
||
|
@@ -976,7 +979,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws | |
} | ||
|
||
@Override | ||
KnnVectorsFormat getVectorsFormat() { | ||
KnnVectorsFormat getVectorsFormat(int mergeThreadCount) { | ||
return new ES813FlatVectorFormat(); | ||
} | ||
|
||
|
@@ -1005,10 +1008,10 @@ private Int8HnswIndexOptions(int m, int efConstruction, Float confidenceInterval | |
} | ||
|
||
@Override | ||
public KnnVectorsFormat getVectorsFormat() { | ||
public KnnVectorsFormat getVectorsFormat(int mergeThreadCount) { | ||
// int bits = 7; | ||
// boolean compress = false; // TODO we only support 7 and false, for now | ||
return new ES814HnswScalarQuantizedVectorsFormat(m, efConstruction, 1, confidenceInterval, null); | ||
return new ES814HnswScalarQuantizedVectorsFormat(m, efConstruction, mergeThreadCount, confidenceInterval, null); | ||
} | ||
|
||
@Override | ||
|
@@ -1067,8 +1070,8 @@ private HnswIndexOptions(int m, int efConstruction) { | |
} | ||
|
||
@Override | ||
public KnnVectorsFormat getVectorsFormat() { | ||
return new Lucene99HnswVectorsFormat(m, efConstruction, 1, null); | ||
public KnnVectorsFormat getVectorsFormat(int mergeThreadCount) { | ||
return new Lucene99HnswVectorsFormat(m, efConstruction, mergeThreadCount, null); | ||
} | ||
|
||
@Override | ||
|
@@ -1101,7 +1104,7 @@ public String toString() { | |
} | ||
|
||
public static final TypeParser PARSER = new TypeParser( | ||
(n, c) -> new Builder(n, c.indexVersionCreated()), | ||
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexSettings().getMergeSchedulerConfig().getMaxThreadCount()), | ||
notInMultiFields(CONTENT_TYPE) | ||
); | ||
|
||
|
@@ -1394,18 +1397,21 @@ ElementType getElementType() { | |
|
||
private final IndexOptions indexOptions; | ||
private final IndexVersion indexCreatedVersion; | ||
private final int mergeThreadCount; | ||
|
||
private DenseVectorFieldMapper( | ||
String simpleName, | ||
MappedFieldType mappedFieldType, | ||
IndexOptions indexOptions, | ||
IndexVersion indexCreatedVersion, | ||
int mergeThreadCount, | ||
MultiFields multiFields, | ||
CopyTo copyTo | ||
) { | ||
super(simpleName, mappedFieldType, multiFields, copyTo); | ||
this.indexOptions = indexOptions; | ||
this.indexCreatedVersion = indexCreatedVersion; | ||
this.mergeThreadCount = mergeThreadCount; | ||
} | ||
|
||
@Override | ||
|
@@ -1448,6 +1454,7 @@ public void parse(DocumentParserContext context) throws IOException { | |
updatedDenseVectorFieldType, | ||
indexOptions, | ||
indexCreatedVersion, | ||
mergeThreadCount, | ||
multiFields(), | ||
copyTo | ||
); | ||
|
@@ -1535,7 +1542,7 @@ protected String contentType() { | |
|
||
@Override | ||
public FieldMapper.Builder getMergeBuilder() { | ||
return new Builder(simpleName(), indexCreatedVersion).init(this); | ||
return new Builder(simpleName(), indexCreatedVersion, mergeThreadCount).init(this); | ||
} | ||
|
||
private static IndexOptions parseIndexOptions(String fieldName, Object propNode) { | ||
|
@@ -1560,7 +1567,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultForm | |
if (indexOptions == null) { | ||
format = defaultFormat; | ||
} else { | ||
format = indexOptions.getVectorsFormat(); | ||
format = indexOptions.getVectorsFormat(mergeThreadCount); | ||
} | ||
// It's legal to reuse the same format name as this is the same on-disk format. | ||
return new KnnVectorsFormat(format.getName()) { | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think this should return
null
. It should instead return a same thread executor.