diff --git a/docs/changelog/130834.yaml b/docs/changelog/130834.yaml new file mode 100644 index 0000000000000..4fb2de3083aea --- /dev/null +++ b/docs/changelog/130834.yaml @@ -0,0 +1,5 @@ +pr: 130834 +summary: Ensure vectors are always included in reindex actions +area: Vector Search +type: enhancement +issues: [] diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/AbstractAsyncBulkByScrollAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/AbstractAsyncBulkByScrollAction.java index de90ff97e6a95..22f585ae5e56a 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/AbstractAsyncBulkByScrollAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/AbstractAsyncBulkByScrollAction.java @@ -44,6 +44,7 @@ import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.threadpool.ThreadPool; @@ -119,6 +120,7 @@ public abstract class AbstractAsyncBulkByScrollAction< BulkByScrollTask task, boolean needsSourceDocumentVersions, boolean needsSourceDocumentSeqNoAndPrimaryTerm, + boolean needsVectors, Logger logger, ParentTaskAssigningClient client, ThreadPool threadPool, @@ -131,6 +133,7 @@ public abstract class AbstractAsyncBulkByScrollAction< task, needsSourceDocumentVersions, needsSourceDocumentSeqNoAndPrimaryTerm, + needsVectors, logger, client, client, @@ -146,6 +149,7 @@ public abstract class AbstractAsyncBulkByScrollAction< BulkByScrollTask task, boolean needsSourceDocumentVersions, boolean needsSourceDocumentSeqNoAndPrimaryTerm, + boolean needsVectors, Logger logger, ParentTaskAssigningClient searchClient, ParentTaskAssigningClient bulkClient, @@ -173,7 +177,7 @@ public abstract class AbstractAsyncBulkByScrollAction< bulkRetry = new Retry(BackoffPolicy.wrap(backoffPolicy, worker::countBulkRetry), threadPool); scrollSource = buildScrollableResultSource( backoffPolicy, - prepareSearchRequest(mainRequest, needsSourceDocumentVersions, needsSourceDocumentSeqNoAndPrimaryTerm) + prepareSearchRequest(mainRequest, needsSourceDocumentVersions, needsSourceDocumentSeqNoAndPrimaryTerm, needsVectors) ); scriptApplier = Objects.requireNonNull(buildScriptApplier(), "script applier must not be null"); } @@ -186,7 +190,8 @@ public abstract class AbstractAsyncBulkByScrollAction< static > SearchRequest prepareSearchRequest( Request mainRequest, boolean needsSourceDocumentVersions, - boolean needsSourceDocumentSeqNoAndPrimaryTerm + boolean needsSourceDocumentSeqNoAndPrimaryTerm, + boolean needsVectors ) { var preparedSearchRequest = new SearchRequest(mainRequest.getSearchRequest()); @@ -205,6 +210,16 @@ static > SearchRequest prep sourceBuilder.version(needsSourceDocumentVersions); sourceBuilder.seqNoAndPrimaryTerm(needsSourceDocumentSeqNoAndPrimaryTerm); + if (needsVectors) { + // always include vectors in the response unless explicitly set + var fetchSource = sourceBuilder.fetchSource(); + if (fetchSource == null) { + sourceBuilder.fetchSource(FetchSourceContext.FETCH_ALL_SOURCE); + } else if (fetchSource.excludeVectors() == null) { + sourceBuilder.excludeVectors(false); + } + } + /* * Do not open scroll if max docs <= scroll size and not resuming on version conflicts */ diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/AsyncDeleteByQueryAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/AsyncDeleteByQueryAction.java index cbe5e0745cdf3..533e3e5548916 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/AsyncDeleteByQueryAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/AsyncDeleteByQueryAction.java @@ -34,7 +34,7 @@ public AsyncDeleteByQueryAction( ScriptService scriptService, ActionListener listener ) { - super(task, false, true, logger, client, threadPool, request, listener, scriptService, null); + super(task, false, true, false, logger, client, threadPool, request, listener, scriptService, null); } @Override diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java index af7b34ee5843a..c073e7c1a83ad 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java @@ -237,6 +237,7 @@ static class AsyncIndexBySearchAction extends AbstractAsyncBulkByScrollAction 0) { + entity.field(FetchSourceContext.INCLUDES_FIELD.getPreferredName(), fetchSource.includes()); + } + if (fetchSource.excludes().length > 0) { + entity.field(FetchSourceContext.EXCLUDES_FIELD.getPreferredName(), fetchSource.excludes()); + } + entity.endObject(); + } + } } entity.endObject(); diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/AsyncBulkByScrollActionTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/AsyncBulkByScrollActionTests.java index db642bbdc5105..4019b881251ed 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/AsyncBulkByScrollActionTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/AsyncBulkByScrollActionTests.java @@ -876,7 +876,7 @@ public void done(TimeValue extraKeepAlive) {} } public void testEnableScrollByDefault() { - var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false); + var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false, false); assertThat(preparedSearchRequest.scroll(), notNullValue()); } @@ -884,7 +884,7 @@ public void testEnableScrollWhenMaxDocsIsGreaterThenScrollSize() { testRequest.setMaxDocs(between(101, 1000)); testRequest.getSearchRequest().source().size(100); - var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false); + var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false, false); assertThat(preparedSearchRequest.scroll(), notNullValue()); } @@ -893,7 +893,7 @@ public void testDisableScrollWhenMaxDocsIsLessThenScrollSize() { testRequest.setMaxDocs(between(1, 100)); testRequest.getSearchRequest().source().size(100); - var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false); + var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false, false); assertThat(preparedSearchRequest.scroll(), nullValue()); } @@ -903,7 +903,7 @@ public void testEnableScrollWhenProceedOnVersionConflict() { testRequest.getSearchRequest().source().size(100); testRequest.setAbortOnVersionConflict(false); - var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false); + var preparedSearchRequest = AbstractAsyncBulkByScrollAction.prepareSearchRequest(testRequest, false, false, false); assertThat(preparedSearchRequest.scroll(), notNullValue()); } @@ -943,6 +943,7 @@ private class DummyAsyncBulkByScrollAction extends AbstractAsyncBulkByScrollActi testTask, randomBoolean(), randomBoolean(), + randomBoolean(), AsyncBulkByScrollActionTests.this.logger, new ParentTaskAssigningClient(client, localNode, testTask), client.threadPool(), diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java index d23ce8eb49f4e..86bcda284babb 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java @@ -10,6 +10,8 @@ package org.elasticsearch.reindex; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.reindex.AbstractBulkByScrollRequest; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.ReindexRequestBuilder; @@ -22,7 +24,10 @@ import java.util.stream.Collectors; import static org.elasticsearch.index.query.QueryBuilders.termQuery; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.lessThanOrEqualTo; @@ -175,4 +180,60 @@ public void testReindexFromComplexDateMathIndexName() throws Exception { assertHitCount(prepareSearch(destIndexName).setSize(0), 4); } + public void testReindexIncludeVectors() throws Exception { + var resp1 = prepareCreate("test").setSettings( + Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); + assertAcked(resp1); + + var resp2 = prepareCreate("test_reindex").setSettings( + Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); + assertAcked(resp2); + + indexRandom( + true, + prepareIndex("test").setId("1").setSource("foo", List.of(3f, 2f, 1.5f), "bar", Map.of("token_1", 4f, "token_2", 7f)) + ); + + var searchResponse = prepareSearch("test").get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.size(), equalTo(0)); + } finally { + searchResponse.decRef(); + } + + // Copy all the docs + ReindexRequestBuilder copy = reindex().source("test").destination("test_reindex").refresh(true); + var reindexResponse = copy.get(); + assertThat(reindexResponse, matcher().created(1)); + + searchResponse = prepareSearch("test_reindex").get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.size(), equalTo(0)); + } finally { + searchResponse.decRef(); + } + + searchResponse = prepareSearch("test_reindex").setExcludeVectors(false).get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.get("foo"), anyOf(equalTo(List.of(3f, 2f, 1.5f)), equalTo(List.of(3d, 2d, 1.5d)))); + assertThat( + sourceMap.get("bar"), + anyOf(equalTo(Map.of("token_1", 4f, "token_2", 7f)), equalTo(Map.of("token_1", 4d, "token_2", 7d))) + ); + } finally { + searchResponse.decRef(); + } + } + } diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java index 36eb1a77d1d0e..ac7d05610ad3c 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java @@ -10,6 +10,8 @@ package org.elasticsearch.reindex; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.reindex.AbstractBulkByScrollRequest; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.UpdateByQueryRequestBuilder; @@ -23,7 +25,10 @@ import java.util.stream.Collectors; import static org.elasticsearch.index.query.QueryBuilders.termQuery; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; public class UpdateByQueryBasicTests extends ReindexTestCase { @@ -150,4 +155,54 @@ public void testMissingSources() { .get(); assertThat(response, matcher().updated(0).slices(hasSize(0))); } + + public void testUpdateByQueryIncludeVectors() throws Exception { + var resp1 = prepareCreate("test").setSettings( + Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); + assertAcked(resp1); + + indexRandom( + true, + prepareIndex("test").setId("1").setSource("foo", List.of(3.0f, 2.0f, 1.5f), "bar", Map.of("token_1", 4f, "token_2", 7f)) + ); + + var searchResponse = prepareSearch("test").get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.size(), equalTo(0)); + } finally { + searchResponse.decRef(); + } + + // Copy all the docs + var updateByQueryResponse = updateByQuery().source("test").refresh(true).get(); + assertThat(updateByQueryResponse, matcher().updated(1L)); + + searchResponse = prepareSearch("test").get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.size(), equalTo(0)); + } finally { + searchResponse.decRef(); + } + + searchResponse = prepareSearch("test").setExcludeVectors(false).get(); + try { + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + var sourceMap = searchResponse.getHits().getAt(0).getSourceAsMap(); + assertThat(sourceMap.get("foo"), anyOf(equalTo(List.of(3f, 2f, 1.5f)), equalTo(List.of(3d, 2d, 1.5d)))); + assertThat( + sourceMap.get("bar"), + anyOf(equalTo(Map.of("token_1", 4f, "token_2", 7f)), equalTo(Map.of("token_1", 4d, "token_2", 7d))) + ); + } finally { + searchResponse.decRef(); + } + } } diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/remote/RemoteRequestBuildersTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/remote/RemoteRequestBuildersTests.java index 81d6500acc038..de2e0b6379536 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/remote/RemoteRequestBuildersTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/remote/RemoteRequestBuildersTests.java @@ -211,14 +211,29 @@ public void testInitialSearchEntity() throws IOException { SearchRequest searchRequest = new SearchRequest(); searchRequest.source(new SearchSourceBuilder()); + // always set by AbstractAsyncBulkByScrollAction#prepareSearchRequest + searchRequest.source().excludeVectors(false); String query = "{\"match_all\":{}}"; HttpEntity entity = initialSearch(searchRequest, new BytesArray(query), remoteVersion).getEntity(); assertEquals(ContentType.APPLICATION_JSON.toString(), entity.getContentType().getValue()); if (remoteVersion.onOrAfter(Version.fromId(1000099))) { - assertEquals( - "{\"query\":" + query + ",\"_source\":true}", - Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)) - ); + if (remoteVersion.onOrAfter(Version.V_9_1_0)) { + // vectors are automatically included on recent versions + assertEquals(XContentHelper.stripWhitespace(Strings.format(""" + { + "query": %s, + "_source": { + "exclude_vectors":false, + "includes": [], + "excludes": [] + } + }""", query)), Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8))); + } else { + assertEquals( + "{\"query\":" + query + ",\"_source\":true}", + Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)) + ); + } } else { assertEquals( "{\"query\":" + query + "}", @@ -230,14 +245,27 @@ public void testInitialSearchEntity() throws IOException { searchRequest.source().fetchSource(new String[] { "in1", "in2" }, new String[] { "out" }); entity = initialSearch(searchRequest, new BytesArray(query), remoteVersion).getEntity(); assertEquals(ContentType.APPLICATION_JSON.toString(), entity.getContentType().getValue()); - assertEquals(XContentHelper.stripWhitespace(Strings.format(""" - { - "query": %s, - "_source": { - "includes": [ "in1", "in2" ], - "excludes": [ "out" ] - } - }""", query)), Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8))); + if (remoteVersion.onOrAfter(Version.V_9_1_0)) { + // vectors are automatically included on recent versions + assertEquals(XContentHelper.stripWhitespace(Strings.format(""" + { + "query": %s, + "_source": { + "exclude_vectors":false, + "includes": [ "in1", "in2" ], + "excludes": [ "out" ] + } + }""", query)), Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8))); + } else { + assertEquals(XContentHelper.stripWhitespace(Strings.format(""" + { + "query": %s, + "_source": { + "includes": [ "in1", "in2" ], + "excludes": [ "out" ] + } + }""", query)), Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8))); + } // Invalid XContent fails RuntimeException e = expectThrows( diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java index 7b2834b1179ab..cc241cdacf229 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java @@ -266,6 +266,14 @@ public SearchRequestBuilder setFetchSource(@Nullable String[] includes, @Nullabl return this; } + /** + * Indicate whether vectors should be excluded from the _source. + */ + public SearchRequestBuilder setExcludeVectors(boolean excludeVectors) { + sourceBuilder().excludeVectors(excludeVectors); + return this; + } + /** * Adds a docvalue based field to load and return. The field does not have to be stored, * but its recommended to use non analyzed or numeric fields. diff --git a/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java index d1fb8bf83af21..a3701f20583db 100644 --- a/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java @@ -878,7 +878,12 @@ public List rescores() { */ public SearchSourceBuilder fetchSource(boolean fetch) { FetchSourceContext fetchSourceContext = this.fetchSourceContext != null ? this.fetchSourceContext : FetchSourceContext.FETCH_SOURCE; - this.fetchSourceContext = FetchSourceContext.of(fetch, fetchSourceContext.includes(), fetchSourceContext.excludes()); + this.fetchSourceContext = FetchSourceContext.of( + fetch, + fetchSourceContext.excludeVectors(), + fetchSourceContext.includes(), + fetchSourceContext.excludes() + ); return this; } @@ -915,7 +920,12 @@ public SearchSourceBuilder fetchSource(@Nullable String include, @Nullable Strin */ public SearchSourceBuilder fetchSource(@Nullable String[] includes, @Nullable String[] excludes) { FetchSourceContext fetchSourceContext = this.fetchSourceContext != null ? this.fetchSourceContext : FetchSourceContext.FETCH_SOURCE; - this.fetchSourceContext = FetchSourceContext.of(fetchSourceContext.fetchSource(), includes, excludes); + this.fetchSourceContext = FetchSourceContext.of( + fetchSourceContext.fetchSource(), + fetchSourceContext.excludeVectors(), + includes, + excludes + ); return this; } @@ -927,6 +937,20 @@ public SearchSourceBuilder fetchSource(@Nullable FetchSourceContext fetchSourceC return this; } + /** + * Indicate whether vectors should be excluded from the _source. + */ + public SearchSourceBuilder excludeVectors(boolean excludeVectors) { + FetchSourceContext fetchSourceContext = this.fetchSourceContext != null ? this.fetchSourceContext : FetchSourceContext.FETCH_SOURCE; + this.fetchSourceContext = FetchSourceContext.of( + fetchSourceContext.fetchSource(), + excludeVectors, + fetchSourceContext.includes(), + fetchSourceContext.excludes() + ); + return this; + } + /** * Gets the {@link FetchSourceContext} which defines how the _source should * be fetched.