Skip to content

Add option to include or exclude vectors from _source retrieval #128735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128735.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128735
summary: Add option to include or exclude vectors from `_source` retrieval
area: Vector Search
type: feature
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
setup:
- requires:
reason: 'exclude_vectors option is required'
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_search
capabilities: [ exclude_vectors_param ]
- skip:
features: "headers"

- do:
indices.create:
index: test
body:
mappings:
properties:
name:
type: keyword
sparse_vector:
type: sparse_vector
vector:
type: dense_vector
dims: 5
similarity: l2_norm

nested:
type: nested
properties:
paragraph_id:
type: keyword
vector:
type: dense_vector
dims: 5
similarity: l2_norm
sparse_vector:
type: sparse_vector

- do:
index:
index: test
id: "1"
body:
name: cow.jpg
vector: [36, 267, -311, 12, -202]

- do:
index:
index: test
id: "2"
body:
name: moose.jpg
nested:
- paragraph_id: 0
vector: [-0.5, 100.0, -13, 14.8, -156.0]
- paragraph_id: 2
vector: [0, 100.0, 0, 14.8, -156.0]
- paragraph_id: 3
vector: [0, 1.0, 0, 1.8, -15.0]

- do:
index:
index: test
id: "3"
body:
name: rabbit.jpg
vector: [-0.5, 100.0, -13, 14.8, -156.0]
sparse_vector:
running: 3
good: 17
run: 22

- do:
index:
index: test
id: "4"
body:
name: zoolander.jpg
nested:
- paragraph_id: 0
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
sparse_vector:
running: 3
good: 17
run: 22
- paragraph_id: 1
sparse_vector:
modeling: 32
model: 20
mode: 54
- paragraph_id: 2
vector: [ -9.8, 109, 32, 14.8, 23 ]


- do:
indices.refresh: {}

---
"exclude vectors":
- do:
search:
index: test
body:
_source:
exclude_vectors: true
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- not_exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- not_exists: hits.hits.2._source.vector
- not_exists: hits.hits.2._source.sparse_vector

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.vector
- not_exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- not_exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }

---
"include vectors":
- do:
search:
index: test
body:
_source:
exclude_vectors: false
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- exists: hits.hits.0._source.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- exists: hits.hits.2._source.vector
- exists: hits.hits.2._source.sparse_vector

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- exists: hits.hits.3._source.nested.0.vector
- exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }

---
"exclude vectors with fields":
- do:
search:
index: test
body:
_source:
exclude_vectors: true
sort: ["name"]
fields: [vector, sparse_vector, nested.*]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.vector
- exists: hits.hits.0.fields.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- not_exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- not_exists: hits.hits.2._source.vector
- exists: hits.hits.2.fields.vector
- not_exists: hits.hits.2._source.sparse_vector
- exists: hits.hits.2.fields.sparse_vector


- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.vector
- exists: hits.hits.3.fields.nested.0.vector
- not_exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.3.fields.nested.0.sparse_vector
- not_exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- exists: hits.hits.3.fields.nested.1.sparse_vector
- not_exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
- exists: hits.hits.3.fields.nested.2.vector
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43);
public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44);
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
Expand Down Expand Up @@ -286,7 +287,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ILM_ADD_SKIP_SETTING = def(9_089_0_00);
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00);
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);

public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ public boolean isDimension() {
return false;
}

/**
* Vector embeddings are typically large and not intended for human consumption, so such fields may be excluded from responses.
*
* @return true if this field contains vector embeddings.
*/
public boolean isVectorEmbedding() {
return false;
}

/**
* @return true if field has script values.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2303,6 +2303,11 @@ public boolean isAggregatable() {
return false;
}

@Override
public boolean isVectorEmbedding() {
return true;
}

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
return elementType.fielddataBuilder(this, fieldDataContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ public String typeName() {
return CONTENT_TYPE;
}

@Override
public boolean isVectorEmbedding() {
return true;
}

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ private SearchCapabilities() {}
private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors";

private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub";

private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields";
private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param";

public static final Set<String> CAPABILITIES;
static {
Expand All @@ -72,6 +72,7 @@ private SearchCapabilities() {}
capabilities.add(INDEX_SELECTOR_SYNTAX);
capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB);
capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS);
capabilities.add(EXCLUDE_VECTORS_PARAM);
CAPABILITIES = Set.copyOf(capabilities);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) {
if (sfc != null && sfc.fetchFields()) {
for (String field : sfc.fieldNames()) {
if (SourceFieldMapper.NAME.equals(field)) {
fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes());
fsc = fsc == null
? FetchSourceContext.of(true)
: FetchSourceContext.of(true, fsc.excludeVectors(), fsc.includes(), fsc.excludes());
}
}
}
Expand Down
Loading
Loading