Skip to content

Commit 76829db

Browse files
authored
[8.19] Add option to include or exclude vectors from _source retrieval (#129142)
* Add option to include or exclude vectors from _source retrieval (#128735) This PR introduces a new include_vectors option to the _source retrieval context. When set to false, vectors are excluded from the returned _source. This is especially efficient when used with synthetic source, as it avoids loading vector fields entirely. By default, vectors remain included unless explicitly excluded. * noop
1 parent d6f7864 commit 76829db

File tree

12 files changed

+424
-18
lines changed

12 files changed

+424
-18
lines changed

docs/changelog/128735.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128735
2+
summary: Add option to include or exclude vectors from `_source` retrieval
3+
area: Vector Search
4+
type: feature
5+
issues: []
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
setup:
2+
- requires:
3+
reason: 'exclude_vectors option is required'
4+
test_runner_features: [ capabilities ]
5+
capabilities:
6+
- method: GET
7+
path: /_search
8+
capabilities: [ exclude_vectors_param ]
9+
- skip:
10+
features: "headers"
11+
12+
- do:
13+
indices.create:
14+
index: test
15+
body:
16+
mappings:
17+
properties:
18+
name:
19+
type: keyword
20+
sparse_vector:
21+
type: sparse_vector
22+
vector:
23+
type: dense_vector
24+
dims: 5
25+
similarity: l2_norm
26+
27+
nested:
28+
type: nested
29+
properties:
30+
paragraph_id:
31+
type: keyword
32+
vector:
33+
type: dense_vector
34+
dims: 5
35+
similarity: l2_norm
36+
sparse_vector:
37+
type: sparse_vector
38+
39+
- do:
40+
index:
41+
index: test
42+
id: "1"
43+
body:
44+
name: cow.jpg
45+
vector: [36, 267, -311, 12, -202]
46+
47+
- do:
48+
index:
49+
index: test
50+
id: "2"
51+
body:
52+
name: moose.jpg
53+
nested:
54+
- paragraph_id: 0
55+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
56+
- paragraph_id: 2
57+
vector: [0, 100.0, 0, 14.8, -156.0]
58+
- paragraph_id: 3
59+
vector: [0, 1.0, 0, 1.8, -15.0]
60+
61+
- do:
62+
index:
63+
index: test
64+
id: "3"
65+
body:
66+
name: rabbit.jpg
67+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
68+
sparse_vector:
69+
running: 3
70+
good: 17
71+
run: 22
72+
73+
- do:
74+
index:
75+
index: test
76+
id: "4"
77+
body:
78+
name: zoolander.jpg
79+
nested:
80+
- paragraph_id: 0
81+
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
82+
sparse_vector:
83+
running: 3
84+
good: 17
85+
run: 22
86+
- paragraph_id: 1
87+
sparse_vector:
88+
modeling: 32
89+
model: 20
90+
mode: 54
91+
- paragraph_id: 2
92+
vector: [ -9.8, 109, 32, 14.8, 23 ]
93+
94+
95+
- do:
96+
indices.refresh: {}
97+
98+
---
99+
"exclude vectors":
100+
- do:
101+
search:
102+
index: test
103+
body:
104+
_source:
105+
exclude_vectors: true
106+
sort: ["name"]
107+
108+
- match: { hits.hits.0._id: "1"}
109+
- match: { hits.hits.0._source.name: "cow.jpg"}
110+
- not_exists: hits.hits.0._source.vector
111+
112+
- match: { hits.hits.1._id: "2"}
113+
- match: { hits.hits.1._source.name: "moose.jpg"}
114+
- length: { hits.hits.1._source.nested: 3 }
115+
- not_exists: hits.hits.1._source.nested.0.vector
116+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
117+
- not_exists: hits.hits.1._source.nested.1.vector
118+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
119+
- not_exists: hits.hits.1._source.nested.2.vector
120+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
121+
122+
- match: { hits.hits.2._id: "3" }
123+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
124+
- not_exists: hits.hits.2._source.vector
125+
- not_exists: hits.hits.2._source.sparse_vector
126+
127+
- match: { hits.hits.3._id: "4" }
128+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
129+
- length: { hits.hits.3._source.nested: 3 }
130+
- not_exists: hits.hits.3._source.nested.0.vector
131+
- not_exists: hits.hits.3._source.nested.0.sparse_vector
132+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
133+
- not_exists: hits.hits.3._source.nested.1.sparse_vector
134+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
135+
- not_exists: hits.hits.3._source.nested.2.vector
136+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
137+
138+
---
139+
"include vectors":
140+
- do:
141+
search:
142+
index: test
143+
body:
144+
_source:
145+
exclude_vectors: false
146+
sort: ["name"]
147+
148+
- match: { hits.hits.0._id: "1"}
149+
- match: { hits.hits.0._source.name: "cow.jpg"}
150+
- exists: hits.hits.0._source.vector
151+
152+
- match: { hits.hits.1._id: "2"}
153+
- match: { hits.hits.1._source.name: "moose.jpg"}
154+
- length: { hits.hits.1._source.nested: 3 }
155+
- exists: hits.hits.1._source.nested.0.vector
156+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
157+
- exists: hits.hits.1._source.nested.1.vector
158+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
159+
- exists: hits.hits.1._source.nested.2.vector
160+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
161+
162+
- match: { hits.hits.2._id: "3" }
163+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
164+
- exists: hits.hits.2._source.vector
165+
- exists: hits.hits.2._source.sparse_vector
166+
167+
- match: { hits.hits.3._id: "4" }
168+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
169+
- length: { hits.hits.3._source.nested: 3 }
170+
- exists: hits.hits.3._source.nested.0.vector
171+
- exists: hits.hits.3._source.nested.0.sparse_vector
172+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
173+
- exists: hits.hits.3._source.nested.1.sparse_vector
174+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
175+
- exists: hits.hits.3._source.nested.2.vector
176+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
177+
178+
---
179+
"exclude vectors with fields":
180+
- do:
181+
search:
182+
index: test
183+
body:
184+
_source:
185+
exclude_vectors: true
186+
sort: ["name"]
187+
fields: [vector, sparse_vector, nested.*]
188+
189+
- match: { hits.hits.0._id: "1"}
190+
- match: { hits.hits.0._source.name: "cow.jpg"}
191+
- not_exists: hits.hits.0._source.vector
192+
- exists: hits.hits.0.fields.vector
193+
194+
- match: { hits.hits.1._id: "2"}
195+
- match: { hits.hits.1._source.name: "moose.jpg"}
196+
- length: { hits.hits.1._source.nested: 3 }
197+
- not_exists: hits.hits.1._source.nested.0.vector
198+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
199+
- not_exists: hits.hits.1._source.nested.1.vector
200+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
201+
- not_exists: hits.hits.1._source.nested.2.vector
202+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
203+
204+
- match: { hits.hits.2._id: "3" }
205+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
206+
- not_exists: hits.hits.2._source.vector
207+
- exists: hits.hits.2.fields.vector
208+
- not_exists: hits.hits.2._source.sparse_vector
209+
- exists: hits.hits.2.fields.sparse_vector
210+
211+
212+
- match: { hits.hits.3._id: "4" }
213+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
214+
- length: { hits.hits.3._source.nested: 3 }
215+
- not_exists: hits.hits.3._source.nested.0.vector
216+
- exists: hits.hits.3.fields.nested.0.vector
217+
- not_exists: hits.hits.3._source.nested.0.sparse_vector
218+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
219+
- exists: hits.hits.3.fields.nested.0.sparse_vector
220+
- not_exists: hits.hits.3._source.nested.1.sparse_vector
221+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
222+
- exists: hits.hits.3.fields.nested.1.sparse_vector
223+
- not_exists: hits.hits.3._source.nested.2.vector
224+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
225+
- exists: hits.hits.3.fields.nested.2.vector

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ static TransportVersion def(int id) {
236236
public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43);
237237
public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44);
238238
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
239+
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
240+
239241
/*
240242
* STOP! READ THIS FIRST! No, really,
241243
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,15 @@ public boolean isDimension() {
199199
return false;
200200
}
201201

202+
/**
203+
* Vector embeddings are typically large and not intended for human consumption, so such fields may be excluded from responses.
204+
*
205+
* @return true if this field contains vector embeddings.
206+
*/
207+
public boolean isVectorEmbedding() {
208+
return false;
209+
}
210+
202211
/**
203212
* @return true if field has script values.
204213
*/

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,6 +2113,11 @@ public boolean isAggregatable() {
21132113
return false;
21142114
}
21152115

2116+
@Override
2117+
public boolean isVectorEmbedding() {
2118+
return true;
2119+
}
2120+
21162121
@Override
21172122
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
21182123
return elementType.fielddataBuilder(this, fieldDataContext);

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ public String typeName() {
121121
return CONTENT_TYPE;
122122
}
123123

124+
@Override
125+
public boolean isVectorEmbedding() {
126+
return true;
127+
}
128+
124129
@Override
125130
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
126131
throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating");

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ private SearchCapabilities() {}
4646
private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors";
4747

4848
private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub";
49-
5049
private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields";
50+
private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param";
5151

5252
public static final Set<String> CAPABILITIES;
5353
static {
@@ -67,6 +67,7 @@ private SearchCapabilities() {}
6767
capabilities.add(INDEX_SELECTOR_SYNTAX);
6868
capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB);
6969
capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS);
70+
capabilities.add(EXCLUDE_VECTORS_PARAM);
7071
CAPABILITIES = Set.copyOf(capabilities);
7172
}
7273
}

server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) {
6969
if (sfc != null && sfc.fetchFields()) {
7070
for (String field : sfc.fieldNames()) {
7171
if (SourceFieldMapper.NAME.equals(field)) {
72-
fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes());
72+
fsc = fsc == null
73+
? FetchSourceContext.of(true)
74+
: FetchSourceContext.of(true, fsc.excludeVectors(), fsc.includes(), fsc.excludes());
7375
}
7476
}
7577
}

0 commit comments

Comments
 (0)