diff --git a/docs/changelog/130382.yaml b/docs/changelog/130382.yaml new file mode 100644 index 0000000000000..629ae4ee2d398 --- /dev/null +++ b/docs/changelog/130382.yaml @@ -0,0 +1,5 @@ +pr: 130382 +summary: Remove vectors from `_source` transparently +area: "Vector Search" +type: enhancement +issues: [] diff --git a/qa/ccs-common-rest/build.gradle b/qa/ccs-common-rest/build.gradle index 103eb88fa4d29..4e7a5249f38c2 100644 --- a/qa/ccs-common-rest/build.gradle +++ b/qa/ccs-common-rest/build.gradle @@ -11,7 +11,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test' restResources { restApi { include 'capabilities', 'cat.shards', '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch', - "nodes.stats", 'search', 'async_search', 'graph', '*_point_in_time', 'info', 'scroll', 'clear_scroll', 'search_mvt', 'eql', 'sql' + "nodes.stats", 'search', 'async_search', 'graph', '*_point_in_time', 'info', 'scroll', 'clear_scroll', 'search_mvt', 'eql', 'sql', 'update' } restTests { includeCore 'field_caps', 'msearch', 'search', 'suggest', 'scroll', "indices.resolve_index" diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java index 44abbf2652f95..37480ba3c14e0 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java @@ -91,7 +91,8 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .setting("xpack.license.self_generated.type", "trial") .feature(FeatureFlag.TIME_SERIES_MODE) .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) - .feature(FeatureFlag.IVF_FORMAT); + .feature(FeatureFlag.IVF_FORMAT) + .feature(FeatureFlag.SYNTHETIC_VECTORS); private static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local() .name(REMOTE_CLUSTER_NAME) diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java index 0471366b0bd53..a79aeba690f57 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java @@ -93,6 +93,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.TIME_SERIES_MODE) .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) .feature(FeatureFlag.IVF_FORMAT) + .feature(FeatureFlag.SYNTHETIC_VECTORS) .user("test_admin", "x-pack-test-password"); private static ElasticsearchCluster fulfillingCluster = ElasticsearchCluster.local() diff --git a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java index 200bff253e2ff..b50df4183e2ab 100644 --- a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java +++ b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java @@ -39,6 +39,7 @@ public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTe .feature(FeatureFlag.DOC_VALUES_SKIPPER) .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) + .feature(FeatureFlag.SYNTHETIC_VECTORS) .build(); public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/get.json index 62eb47821e0aa..cb696810dbd9b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/get.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/get.json @@ -68,6 +68,10 @@ "type":"list", "description":"A list of fields to extract and return from the _source field" }, + "_source_exclude_vectors":{ + "type":"boolean", + "description":"Whether vectors should be excluded from _source" + }, "version":{ "type":"number", "description":"Explicit version number for concurrency control" diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/search.json b/rest-api-spec/src/main/resources/rest-api-spec/api/search.json index 25b4efd9c4c37..0cbe1f94f14d5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/search.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/search.json @@ -155,6 +155,10 @@ "type":"list", "description":"A list of fields to extract and return from the _source field" }, + "_source_exclude_vectors":{ + "type":"boolean", + "description":"Whether vectors should be excluded from _source" + }, "terminate_after":{ "type":"number", "description":"The maximum number of documents to collect for each shard, upon reaching which the query execution will terminate early." diff --git a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java index ee7cd9059e50d..de2a0859dcf7b 100644 --- a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java +++ b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java @@ -39,6 +39,7 @@ public class ClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.DOC_VALUES_SKIPPER) .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) + .feature(FeatureFlag.SYNTHETIC_VECTORS) .build(); public ClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml new file mode 100644 index 0000000000000..414f6cfdad645 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml @@ -0,0 +1,347 @@ +setup: + - requires: + reason: 'synthetic vectors are required' + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ synthetic_vectors_setting ] + - skip: + features: "headers" + + - do: + indices.create: + index: test + body: + settings: + index.mapping.synthetic_vectors: true + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + dims: 3 + similarity: l2_norm + + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + dims: 3 + similarity: l2_norm + + - do: + index: + index: test + id: "1" + body: + name: cow.jpg + vector: [1, 2, 3] + + - do: + index: + index: test + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [1, 2, 3] + - paragraph_id: 2 + vector: [4, 5, 6] + - paragraph_id: 3 + vector: [7, 8, 9] + + - do: + index: + index: test + id: "3" + body: + name: rabbit.jpg + vector: [10, 11, 12] + + - do: + index: + index: test + id: "4" + body: + name: zoolander.jpg + nested: + - paragraph_id: 0 + vector: [ 13, 14, 15 ] + - paragraph_id: 1 + - paragraph_id: 2 + vector: [ 16, 17, 18 ] + + - do: + indices.refresh: {} + +--- +"exclude synthetic vectors": + - do: + search: + index: test + body: + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - not_exists: hits.hits.0._source.vector + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - not_exists: hits.hits.1._source.nested.0.vector + - match: { hits.hits.1._source.nested.0.paragraph_id: 0 } + - not_exists: hits.hits.1._source.nested.1.vector + - match: { hits.hits.1._source.nested.1.paragraph_id: 2 } + - not_exists: hits.hits.1._source.nested.2.vector + - match: { hits.hits.1._source.nested.2.paragraph_id: 3 } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - not_exists: hits.hits.2._source.vector + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - not_exists: hits.hits.3._source.nested.0.vector + - match: { hits.hits.3._source.nested.0.paragraph_id: 0 } + - match: { hits.hits.3._source.nested.1.paragraph_id: 1 } + - not_exists: hits.hits.3._source.nested.2.vector + - match: { hits.hits.3._source.nested.2.paragraph_id: 2 } + +--- +"include synthetic vectors": + - do: + search: + index: test + body: + _source: + exclude_vectors: false + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - exists: hits.hits.0._source.vector + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - exists: hits.hits.1._source.nested.0.vector + - match: { hits.hits.1._source.nested.0.paragraph_id: 0 } + - exists: hits.hits.1._source.nested.1.vector + - match: { hits.hits.1._source.nested.1.paragraph_id: 2 } + - exists: hits.hits.1._source.nested.2.vector + - match: { hits.hits.1._source.nested.2.paragraph_id: 3 } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - exists: hits.hits.2._source.vector + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - exists: hits.hits.3._source.nested.0.vector + - length: { hits.hits.3._source.nested.0.vector: 3 } + - match: { hits.hits.3._source.nested.0.paragraph_id: 0 } + + - do: + search: + index: test + body: + _source: + exclude_vectors: false + includes: nested.vector + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - length: { hits.hits.0._source: 0} + + - match: { hits.hits.1._id: "2"} + - length: { hits.hits.3._source: 1 } + - length: { hits.hits.1._source.nested: 3 } + - exists: hits.hits.1._source.nested.0.vector + - not_exists: hits.hits.1._source.nested.0.paragraph_id + - exists: hits.hits.1._source.nested.1.vector + - not_exists: hits.hits.1._source.nested.1.paragraph_id + - exists: hits.hits.1._source.nested.2.vector + - not_exists: hits.hits.1._source.nested.2.paragraph_id + + - match: { hits.hits.2._id: "3" } + - length: { hits.hits.2._source: 0} + + - match: { hits.hits.3._id: "4" } + - length: { hits.hits.3._source: 1 } + - length: { hits.hits.3._source.nested: 2 } + - exists: hits.hits.3._source.nested.0.vector + - length: { hits.hits.3._source.nested.0.vector: 3 } + - not_exists: hits.hits.3._source.nested.0.paragraph_id + - exists: hits.hits.3._source.nested.1.vector + - length: { hits.hits.3._source.nested.1.vector: 3 } + - not_exists: hits.hits.3._source.nested.1.paragraph_id + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the embeddings as doubles + Content-Type: application/json + search: + index: test + body: + _source: + exclude_vectors: true + sort: ["name"] + fields: ["vector"] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - not_exists: hits.hits.0._source.vector + - match: { hits.hits.0.fields.vector: [1.0, 2.0, 3.0]} + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - not_exists: hits.hits.1._source.nested.0.vector + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - match: { hits.hits.2.fields.vector: [10.0, 11.0, 12.0]} + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - not_exists: hits.hits.3._source.nested.0.vector + + +--- +"Bulk partial update with synthetic vectors": + - do: + headers: + # Force JSON content type so that we use a parser that interprets the embeddings as doubles + Content-Type: application/json + bulk: + index: test + _source: true + body: + - '{"update": {"_id": "4"}}' + - > + { + "doc": { + "name": "zoolander2.jpg", + "vector": [1, 2, 4] + } + } + + - match: { items.0.update.get._source.vector: [1, 2, 4] } + - exists: items.0.update.get._source.nested + - length: { items.0.update.get._source.nested: 3} + - exists: items.0.update.get._source.nested.0.vector + - match: { items.0.update.get._source.nested.0.paragraph_id: 0 } + - length: { items.0.update.get._source.nested.0.vector: 3 } + - not_exists: items.0.update.get._source.nested.1.vector + - match: { items.0.update.get._source.nested.1.paragraph_id: 1 } + - exists: items.0.update.get._source.nested.2.vector + - length: { items.0.update.get._source.nested.2.vector: 3 } + - match: { items.0.update.get._source.nested.2.paragraph_id: 2 } + - set: { items.0.update.get._source.nested: original_nested } + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the embeddings as doubles + Content-Type: application/json + get: + _source_exclude_vectors: false + index: test + id: "4" + + - match: { _source.vector: [1.0, 2.0, 4.0] } + - match: { _source.name: zoolander2.jpg } + - match: { _source.nested: $original_nested } + + - do: + indices.refresh: {} + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the embeddings as doubles + Content-Type: application/json + search: + index: test + body: + _source: + "exclude_vectors": false + query: + term: + _id: 4 + + - match: { hits.total.value: 1 } + - match: { hits.total.relation: eq } + - match: { hits.hits.0._source.name: zoolander2.jpg } + - match: { hits.hits.0._source.nested: $original_nested } + +--- +"Partial update with synthetic vectors": + - do: + headers: + # Force JSON content type so that we use a parser that interprets the vectors as doubles + Content-Type: application/json + update: + index: test + id: "4" + body: + _source: true + doc: { + "name": "zoolander3.jpg", + "vector": [6, 8, 9] + } + + - match: { get._source.vector: [6, 8, 9] } + - exists: get._source.nested + - length: { get._source.nested: 3} + - exists: get._source.nested.0.vector + - match: { get._source.nested.0.paragraph_id: 0 } + - length: { get._source.nested.0.vector: 3 } + - not_exists: get._source.nested.1.vector + - match: { get._source.nested.1.paragraph_id: 1 } + - exists: get._source.nested.2.vector + - length: { get._source.nested.2.vector: 3 } + - match: { get._source.nested.2.paragraph_id: 2 } + - set: { get._source.nested: original_nested } + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the vectors as doubles + Content-Type: application/json + get: + _source_exclude_vectors: false + index: test + id: "4" + + - match: { _source.vector: [6.0, 8.0, 9.0] } + - match: { _source.name: zoolander3.jpg } + - match: { _source.nested: $original_nested } + + - do: + indices.refresh: {} + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the vectors as doubles + Content-Type: application/json + search: + index: test + body: + _source: + "exclude_vectors": false + query: + term: + _id: 4 + + - match: { hits.total.value: 1 } + - match: { hits.total.relation: eq } + - match: { hits.hits.0._source.name: zoolander3.jpg } + - match: { hits.hits.0._source.nested: $original_nested } diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 796b03211432b..9f4c5b80ccf23 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -49,6 +49,8 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; + /** * Encapsulates all valid index level settings. * @see Property#IndexScope @@ -241,6 +243,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } + if (SYNTHETIC_VECTORS) { + settings.add(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING); + } BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index eac2ef3d42b61..dd1f241fe32e5 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -847,6 +847,14 @@ private static String getIgnoreAboveDefaultValue(final Settings settings) { Property.Final ); + public static final boolean SYNTHETIC_VECTORS = new FeatureFlag("mapping_synthetic_vectors").isEnabled(); + public static final Setting INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING = Setting.boolSetting( + "index.mapping.synthetic_vectors", + false, + Property.IndexScope, + Property.Final + ); + private final Index index; private final IndexVersion version; private final Logger logger; @@ -890,7 +898,6 @@ private static String getIgnoreAboveDefaultValue(final Settings settings) { private final boolean logsdbRouteOnSortFields; private final boolean logsdbSortOnHostName; private final boolean logsdbAddHostNameField; - private volatile long retentionLeaseMillis; /** diff --git a/server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java b/server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java index 5c50bbeac796b..54010cab0f3f4 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java +++ b/server/src/main/java/org/elasticsearch/index/engine/LuceneChangesSnapshot.java @@ -22,11 +22,16 @@ import org.elasticsearch.index.fieldvisitor.FieldsVisitor; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.mapper.SourceLoader.SyntheticVectorsLoader; import org.elasticsearch.index.translog.Translog; +import org.elasticsearch.search.lookup.Source; import org.elasticsearch.transport.Transports; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; /** * A {@link Translog.Snapshot} from changes in a Lucene index @@ -42,6 +47,8 @@ public final class LuceneChangesSnapshot extends SearchBasedChangesSnapshot { private int storedFieldsReaderOrd = -1; private StoredFieldsReader storedFieldsReader = null; + private final SyntheticVectorsLoader syntheticVectorPatchLoader; + private SyntheticVectorsLoader.Leaf syntheticVectorPatchLoaderLeaf; private final Thread creationThread; // for assertion @@ -76,6 +83,7 @@ public LuceneChangesSnapshot( this.lastSeenSeqNo = fromSeqNo - 1; final TopDocs topDocs = nextTopDocs(); this.maxDocIndex = topDocs.scoreDocs.length; + this.syntheticVectorPatchLoader = mapperService.mappingLookup().getMapping().syntheticVectorsLoader(null); fillParallelArray(topDocs.scoreDocs, parallelArray); } @@ -218,7 +226,7 @@ private Translog.Operation readDocAsOp(int docIndex) throws IOException { if (leaf.reader() instanceof SequentialStoredFieldsLeafReader) { storedFieldsReader = ((SequentialStoredFieldsLeafReader) leaf.reader()).getSequentialStoredFieldsReader(); storedFieldsReaderOrd = leaf.ord; - setNextSourceMetadataReader(leaf); + setNextSyntheticFieldsReader(leaf); } else { storedFieldsReader = null; storedFieldsReaderOrd = -1; @@ -232,10 +240,12 @@ private Translog.Operation readDocAsOp(int docIndex) throws IOException { assert storedFieldsReaderOrd == leaf.ord : storedFieldsReaderOrd + " != " + leaf.ord; storedFieldsReader.document(segmentDocID, fields); } else { - setNextSourceMetadataReader(leaf); + setNextSyntheticFieldsReader(leaf); leaf.reader().storedFields().document(segmentDocID, fields); } - final BytesReference source = fields.source() != null ? addSourceMetadata(fields.source(), segmentDocID) : null; + final BytesReference source = fields.source() != null && fields.source().length() > 0 + ? addSyntheticFields(Source.fromBytes(fields.source()), segmentDocID).internalSourceRef() + : fields.source(); final Translog.Operation op; final boolean isTombstone = parallelArray.isTombStone[docIndex]; @@ -281,6 +291,28 @@ private Translog.Operation readDocAsOp(int docIndex) throws IOException { return op; } + @Override + protected void setNextSyntheticFieldsReader(LeafReaderContext context) throws IOException { + super.setNextSyntheticFieldsReader(context); + if (syntheticVectorPatchLoader != null) { + syntheticVectorPatchLoaderLeaf = syntheticVectorPatchLoader.leaf(context); + } + } + + @Override + protected Source addSyntheticFields(Source source, int segmentDocID) throws IOException { + if (syntheticVectorPatchLoaderLeaf == null) { + return super.addSyntheticFields(source, segmentDocID); + } + List patches = new ArrayList<>(); + syntheticVectorPatchLoaderLeaf.load(segmentDocID, patches); + if (patches.size() == 0) { + return super.addSyntheticFields(source, segmentDocID); + } + var newSource = SourceLoader.applySyntheticVectors(source, patches); + return super.addSyntheticFields(newSource, segmentDocID); + } + private static final class ParallelArray { final LeafReaderContext[] leafReaderContexts; final int[] docID; diff --git a/server/src/main/java/org/elasticsearch/index/engine/LuceneSyntheticSourceChangesSnapshot.java b/server/src/main/java/org/elasticsearch/index/engine/LuceneSyntheticSourceChangesSnapshot.java index 5b1e31f46987a..3b4986c6e17af 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/LuceneSyntheticSourceChangesSnapshot.java +++ b/server/src/main/java/org/elasticsearch/index/engine/LuceneSyntheticSourceChangesSnapshot.java @@ -219,7 +219,7 @@ var record = documentRecords.get(j); int[] nextDocIdArray = nextDocIds.toArray(); leafFieldLoader = storedFieldLoader.getLoader(leafReaderContext, nextDocIdArray); leafSourceLoader = sourceLoader.leaf(leafReaderContext.reader(), nextDocIdArray); - setNextSourceMetadataReader(leafReaderContext); + setNextSyntheticFieldsReader(leafReaderContext); } int segmentDocID = docRecord.docID() - docBase; leafFieldLoader.advanceTo(segmentDocID); @@ -255,13 +255,13 @@ private Translog.Operation createOperation( return null; } } - var sourceBytes = addSourceMetadata(sourceLoader.source(fieldLoader, segmentDocID).internalSourceRef(), segmentDocID); + var source = addSyntheticFields(sourceLoader.source(fieldLoader, segmentDocID), segmentDocID); return new Translog.Index( fieldLoader.id(), docRecord.seqNo(), docRecord.primaryTerm(), docRecord.version(), - sourceBytes, + source.internalSourceRef(), fieldLoader.routing(), -1 // autogenerated timestamp ); diff --git a/server/src/main/java/org/elasticsearch/index/engine/SearchBasedChangesSnapshot.java b/server/src/main/java/org/elasticsearch/index/engine/SearchBasedChangesSnapshot.java index ac97ff5812300..5fdf92d9eb1d3 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SearchBasedChangesSnapshot.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchBasedChangesSnapshot.java @@ -21,7 +21,6 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollectorManager; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.core.IOUtils; @@ -199,14 +198,14 @@ protected TopDocs nextTopDocs() throws IOException { } /** - * Sets the reader context to enable reading metadata that was removed from the {@code _source}. + * Sets the reader context to enable reading synthetic fields that were removed from the {@code _source}. * This method sets up the {@code sourceMetadataFetcher} with the provided {@link LeafReaderContext}, * ensuring it is ready to fetch metadata for subsequent operations. * - *

Note: This method should be called before {@link #addSourceMetadata(BytesReference, int)} at the start of every leaf + *

Note: This method should be called before {@link #addSyntheticFields(Source, int)} at the start of every leaf * to ensure the metadata fetcher is properly initialized.

*/ - protected void setNextSourceMetadataReader(LeafReaderContext context) { + protected void setNextSyntheticFieldsReader(LeafReaderContext context) throws IOException { if (sourceMetadataFetcher != null) { sourceMetadataFetcher.setNextReader(context); } @@ -214,27 +213,25 @@ protected void setNextSourceMetadataReader(LeafReaderContext context) { /** * Creates a new {@link Source} object by combining the provided {@code originalSource} - * with additional metadata fields. If the {@code sourceMetadataFetcher} is null or no metadata + * with additional synthetic fields. If the {@code sourceMetadataFetcher} is null or no metadata * fields are fetched, the original source is returned unchanged. * - * @param originalSourceBytes the original source bytes + * @param originalSource the original source * @param segmentDocID the document ID used to fetch metadata fields * @return a new {@link Source} instance containing the original data and additional metadata, * or the original source if no metadata is added - * @throws IOException if an error occurs while fetching metadata values + * @throws IOException if an error occurs while fetching synthetic values */ - protected BytesReference addSourceMetadata(BytesReference originalSourceBytes, int segmentDocID) throws IOException { + protected Source addSyntheticFields(Source originalSource, int segmentDocID) throws IOException { if (sourceMetadataFetcher == null) { - return originalSourceBytes; + return originalSource; } - var originalSource = Source.fromBytes(originalSourceBytes); List values = sourceMetadataFetcher.fetchValues(originalSource, segmentDocID, List.of()); if (values.isEmpty()) { - return originalSourceBytes; + return originalSource; } - var map = originalSource.source(); - map.put(InferenceMetadataFieldsMapper.NAME, values.get(0)); - return Source.fromMap(map, originalSource.sourceContentType()).internalSourceRef(); + originalSource.source().put(InferenceMetadataFieldsMapper.NAME, values.get(0)); + return Source.fromMap(originalSource.source(), originalSource.sourceContentType()); } static IndexSearcher newIndexSearcher(Engine.Searcher engineSearcher) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java index d8b51648cb586..2cd3a9f755ffb 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java @@ -39,7 +39,8 @@ public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) th return true; } if (engineConfig.getIndexSettings().isRecoverySourceSyntheticEnabled() - || engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false) { + || engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false + || engineConfig.getMapperService().mappingLookup().syntheticVectorFields().isEmpty() == false) { return super.assertSameIndexOperation(synthesizeSource(engineConfig, o1), o2) || super.assertSameIndexOperation(o1, synthesizeSource(engineConfig, o2)); } diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index f0f56333c7529..1e553e913379c 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.document.DocumentField; @@ -19,7 +20,9 @@ import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.VersionType; @@ -39,6 +42,7 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.MultiEngineGet; +import org.elasticsearch.search.fetch.subphase.FetchFieldsContext; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.lookup.SourceFilter; @@ -54,7 +58,9 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Function; +import java.util.stream.Collectors; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; @@ -220,7 +226,7 @@ public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, Strin VersionType.INTERNAL, ifSeqNo, ifPrimaryTerm, - FetchSourceContext.FETCH_SOURCE, + FetchSourceContext.FETCH_ALL_SOURCE, false, indexShard::get ); @@ -306,7 +312,12 @@ private GetResult innerGetFetch( Map documentFields = null; Map metadataFields = null; DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); - var sourceFilter = fetchSourceContext.filter(); + + var res = maybeExcludeSyntheticVectorFields(mappingLookup, indexSettings, fetchSourceContext, null); + if (res.v1() != fetchSourceContext) { + fetchSourceContext = res.v1(); + } + var sourceFilter = res.v2(); SourceLoader loader = forceSyntheticSource ? new SourceLoader.Synthetic( sourceFilter, @@ -400,6 +411,77 @@ private GetResult innerGetFetch( ); } + /** + * Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}. + * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise. + */ + public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { + if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) { + return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings()); + } + return fetchSourceContext.excludeVectors(); + } + + /** + * Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields, + * unless vectors are explicitly requested to be included in the source. + * Returns {@code null} when vectors should not be filtered out. + */ + public static Tuple maybeExcludeSyntheticVectorFields( + MappingLookup mappingLookup, + IndexSettings indexSettings, + FetchSourceContext fetchSourceContext, + FetchFieldsContext fetchFieldsContext + ) { + if (shouldExcludeVectorsFromSource(indexSettings, fetchSourceContext) == false) { + return Tuple.tuple(fetchSourceContext, null); + } + var fetchFieldsAut = fetchFieldsContext != null && fetchFieldsContext.fields().size() > 0 + ? new CharacterRunAutomaton( + Regex.simpleMatchToAutomaton(fetchFieldsContext.fields().stream().map(f -> f.field).toArray(String[]::new)) + ) + : null; + var inferenceFieldsAut = mappingLookup.inferenceFields().size() > 0 + ? new CharacterRunAutomaton( + Regex.simpleMatchToAutomaton(mappingLookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new)) + ) + : null; + + List lateExcludes = new ArrayList<>(); + var excludes = mappingLookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> { + // Exclude the field specified by the `fields` option + if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) { + lateExcludes.add(f.name()); + return false; + } + // Exclude vectors from semantic text fields, as they are processed separately + return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false; + }).map(f -> f.name()).collect(Collectors.toList()); + + var sourceFilter = excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new)); + if (lateExcludes.size() > 0) { + /** + * Adds the vector field specified by the `fields` option to the excludes list of the fetch source context. + * This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}. + */ + if (fetchSourceContext != null && fetchSourceContext.excludes() != null) { + for (var exclude : fetchSourceContext.excludes()) { + lateExcludes.add(exclude); + } + } + var newFetchSourceContext = fetchSourceContext == null + ? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new)) + : FetchSourceContext.of( + fetchSourceContext.fetchSource(), + fetchSourceContext.excludeVectors(), + fetchSourceContext.includes(), + lateExcludes.toArray(String[]::new) + ); + return Tuple.tuple(newFetchSourceContext, sourceFilter); + } + return Tuple.tuple(fetchSourceContext, sourceFilter); + } + private static DocumentField loadIgnoredMetadataField(final DocIdAndVersion docIdAndVersion) throws IOException { final SortedSetDocValues ignoredDocValues = docIdAndVersion.reader.getContext() .reader() diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 78320a536dec1..15e7ff88350b6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -804,7 +805,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, - context.indexSettings().getIndexVersionCreated() + context.indexSettings().getIndexVersionCreated(), + IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()) ); builder.dimensions(mappers.size()); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 03f463b25a967..a43575b8f990c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -453,6 +453,17 @@ public Map indexAnalyzers() { return Map.of(); } + /** + * Returns a {@link SourceLoader.SyntheticVectorsLoader} instance responsible for loading + * synthetic vector values from the index. + * + * @return a {@link SourceLoader.SyntheticVectorsLoader} used to extract synthetic vectors, + * or {@code null} if no loader is provided or applicable in this context + */ + public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { + return null; + } + /** *

* Specifies the mode of synthetic source support by the mapper. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java index 7ec175799ddec..4a3e67455e603 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java @@ -124,6 +124,24 @@ Mapping mappingUpdate(RootObjectMapper rootObjectMapper) { return new Mapping(rootObjectMapper, metadataMappers, meta); } + /** + * Returns a {@link SourceLoader.SyntheticVectorsLoader} that loads synthetic vector values + * from a source document, optionally applying a {@link SourceFilter}. + *

+ * The {@code filter}, if provided, can be used to limit which fields from the mapping + * are considered when computing synthetic vectors. This allows for performance + * optimizations or targeted vector extraction. + *

+ * + * @param filter an optional {@link SourceFilter} to restrict the fields considered during loading; + * may be {@code null} to indicate no filtering + * @return a {@link SourceLoader.SyntheticVectorsLoader} for extracting synthetic vectors, + * potentially using the provided filter + */ + public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(@Nullable SourceFilter filter) { + return root.syntheticVectorsLoader(filter); + } + private boolean isSourceSynthetic() { SourceFieldMapper sfm = (SourceFieldMapper) metadataMappersByName.get(SourceFieldMapper.NAME); return sfm != null && sfm.isSynthetic(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index d0e9c1835bfd4..08314f2282b5f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -51,6 +51,7 @@ private CacheKey() {} private final Map fieldMappers; private final Map objectMappers; private final Map inferenceFields; + private final Set syntheticVectorFields; private final int runtimeFieldMappersCount; private final NestedLookup nestedLookup; private final FieldTypeLookup fieldTypeLookup; @@ -188,12 +189,17 @@ private MappingLookup( this.fieldTypeLookup = new FieldTypeLookup(mappers, aliasMappers, passThroughMappers, runtimeFields); Map inferenceFields = new HashMap<>(); + List syntheticVectorFields = new ArrayList<>(); for (FieldMapper mapper : mappers) { if (mapper instanceof InferenceFieldMapper inferenceFieldMapper) { inferenceFields.put(mapper.fullPath(), inferenceFieldMapper.getMetadata(fieldTypeLookup.sourcePaths(mapper.fullPath()))); } + if (mapper.syntheticVectorsLoader() != null) { + syntheticVectorFields.add(mapper.fullPath()); + } } this.inferenceFields = Map.copyOf(inferenceFields); + this.syntheticVectorFields = Set.copyOf(syntheticVectorFields); if (runtimeFields.isEmpty()) { // without runtime fields this is the same as the field type lookup @@ -378,6 +384,10 @@ public Map inferenceFields() { return inferenceFields; } + public Set syntheticVectorFields() { + return syntheticVectorFields; + } + public NestedLookup nestedLookup() { return nestedLookup; } @@ -486,9 +496,29 @@ public SourceLoader newSourceLoader(@Nullable SourceFilter filter, SourceFieldMe if (isSourceSynthetic()) { return new SourceLoader.Synthetic(filter, () -> mapping.syntheticFieldLoader(filter), metrics); } + var syntheticVectorsLoader = mapping.syntheticVectorsLoader(filter); + if (syntheticVectorsLoader != null) { + return new SourceLoader.SyntheticVectors(removeExcludedSyntheticVectorFields(filter), syntheticVectorsLoader); + } return filter == null ? SourceLoader.FROM_STORED_SOURCE : new SourceLoader.Stored(filter); } + private SourceFilter removeExcludedSyntheticVectorFields(@Nullable SourceFilter filter) { + if (filter == null || filter.getExcludes().length == 0) { + return filter; + } + List newExcludes = new ArrayList<>(); + for (var exclude : filter.getExcludes()) { + if (syntheticVectorFields().contains(exclude) == false) { + newExcludes.add(exclude); + } + } + if (newExcludes.isEmpty() && filter.getIncludes().length == 0) { + return null; + } + return new SourceFilter(filter.getIncludes(), newExcludes.toArray(String[]::new)); + } + /** * Returns if this mapping contains a data-stream's timestamp meta-field and this field is enabled. * Only indices that are a part of a data-stream have this meta-field enabled. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java index 08b0fb32a10c5..b3a80dee26a85 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java @@ -16,6 +16,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.util.BitSet; +import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -408,6 +409,37 @@ protected MapperMergeContext createChildContext(MapperMergeContext mapperMergeCo ); } + @Override + protected SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(SourceFilter sourceFilter) { + var patchLoader = super.syntheticVectorsLoader(sourceFilter); + if (patchLoader == null) { + return null; + } + return context -> { + var leaf = patchLoader.leaf(context); + if (leaf == null) { + return null; + } + IndexSearcher searcher = new IndexSearcher(context.reader()); + searcher.setQueryCache(null); + var childScorer = searcher.createWeight(nestedTypeFilter, ScoreMode.COMPLETE_NO_SCORES, 1f) + .scorer(searcher.getLeafContexts().get(0)); + if (childScorer == null) { + return null; + } + var parentsDocs = bitsetProducer.apply(parentTypeFilter).getBitSet(context); + return (doc, acc) -> { + List nestedPatches = new ArrayList<>(); + collectChildren(nestedTypePath, doc, parentsDocs, childScorer.iterator(), (offset, childId) -> { + List childPatches = new ArrayList<>(); + leaf.load(childId, childPatches); + nestedPatches.add(new SourceLoader.NestedOffsetSyntheticVectorPath(offset, childPatches)); + }); + acc.add(new SourceLoader.NestedSyntheticVectorPath(fullPath(), nestedPatches)); + }; + }; + } + @Override SourceLoader.SyntheticFieldLoader syntheticFieldLoader(SourceFilter filter, Collection mappers, boolean isFragment) { // IgnoredSourceFieldMapper integration takes care of writing the source for nested objects that enabled store_array_source. @@ -459,11 +491,19 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf IndexSearcher searcher = new IndexSearcher(leafReader); searcher.setQueryCache(null); - var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f).scorer(leafReader.getContext()); + var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f) + .scorer(searcher.getLeafContexts().get(0)); if (childScorer != null) { var parentDocs = parentBitSetProducer.get().getBitSet(leafReader.getContext()); return parentDoc -> { - collectChildren(parentDoc, parentDocs, childScorer.iterator()); + children.clear(); + collectChildren( + nestedTypePath, + parentDoc, + parentDocs, + childScorer.iterator(), + (offset, childId) -> children.add(childId) + ); return children.size() > 0; }; } else { @@ -471,21 +511,6 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf } } - private List collectChildren(int parentDoc, BitSet parentDocs, DocIdSetIterator childIt) throws IOException { - assert parentDoc < 0 || parentDocs.get(parentDoc) : "wrong context, doc " + parentDoc + " is not a parent of " + nestedTypePath; - final int prevParentDoc = parentDoc > 0 ? parentDocs.prevSetBit(parentDoc - 1) : -1; - int childDocId = childIt.docID(); - if (childDocId <= prevParentDoc) { - childDocId = childIt.advance(prevParentDoc + 1); - } - - children.clear(); - for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) { - children.add(childDocId); - } - return children; - } - @Override public boolean hasValue() { return children.size() > 0; @@ -518,4 +543,24 @@ public void reset() { children.clear(); } } + + private static void collectChildren( + String nestedTypePath, + int parentDoc, + BitSet parentDocs, + DocIdSetIterator childIt, + CheckedBiConsumer childConsumer + ) throws IOException { + assert parentDoc < 0 || parentDocs.get(parentDoc) : "wrong context, doc " + parentDoc + " is not a parent of " + nestedTypePath; + final int prevParentDoc = parentDoc > 0 ? parentDocs.prevSetBit(parentDoc - 1) : -1; + int childDocId = childIt.docID(); + if (childDocId <= prevParentDoc) { + childDocId = childIt.advance(prevParentDoc + 1); + } + + int offset = 0; + for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) { + childConsumer.accept(offset++, childDocId); + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index 3c71f6bf2db5b..33ed032730561 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -43,6 +43,7 @@ import java.util.Optional; import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; import java.util.stream.Stream; public class ObjectMapper extends Mapper { @@ -911,6 +912,47 @@ public ObjectMapper findParentMapper(String leafFieldPath) { return null; } + private static SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(Mapper mapper, SourceFilter sourceFilter) { + if (sourceFilter != null && sourceFilter.isPathFiltered(mapper.fullPath(), false)) { + return null; + } + if (mapper instanceof ObjectMapper objMapper) { + return objMapper.syntheticVectorsLoader(sourceFilter); + } else if (mapper instanceof FieldMapper fieldMapper) { + return fieldMapper.syntheticVectorsLoader(); + } else { + return null; + } + } + + SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(SourceFilter sourceFilter) { + var loaders = mappers.values() + .stream() + .map(m -> syntheticVectorsLoader(m, sourceFilter)) + .filter(l -> l != null) + .collect(Collectors.toList()); + if (loaders.isEmpty()) { + return null; + } + return context -> { + final List leaves = new ArrayList<>(); + for (var loader : loaders) { + var leaf = loader.leaf(context); + if (leaf != null) { + leaves.add(leaf); + } + } + if (leaves.isEmpty()) { + return null; + } + return (doc, acc) -> { + for (var leaf : leaves) { + leaf.load(doc, acc); + } + }; + }; + } + SourceLoader.SyntheticFieldLoader syntheticFieldLoader(SourceFilter filter, Collection mappers, boolean isFragment) { var fields = mappers.stream() .sorted(Comparator.comparing(Mapper::fullPath)) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index cc624ebbeda74..a4c8b0a5b50b1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -18,8 +18,10 @@ import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexMode; @@ -33,14 +35,20 @@ import org.elasticsearch.search.fetch.subphase.FetchSourcePhase; import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.lookup.SourceFilter; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentGenerator; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; public class SourceFieldMapper extends MetadataFieldMapper { public static final NodeFeature REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION = new NodeFeature( @@ -426,64 +434,81 @@ public boolean isComplete() { @Override public void preParse(DocumentParserContext context) throws IOException { - int originalSourceLength = context.sourceToParse().source().length(); XContentType contentType = context.sourceToParse().getXContentType(); - BytesReference originalSource = removeInferenceMetadataFields( - context.mappingLookup(), - context.sourceToParse().source(), - contentType - ); - final BytesReference adaptedSource = applyFilters(context.mappingLookup(), originalSource, contentType, false); - if (adaptedSource != null) { - final BytesRef ref = adaptedSource.toBytesRef(); + final var originalSource = context.sourceToParse().source(); + final var storedSource = stored() ? removeSyntheticVectorFields(context.mappingLookup(), originalSource, contentType) : null; + final var adaptedStoredSource = applyFilters(context.mappingLookup(), storedSource, contentType, false); + + if (adaptedStoredSource != null) { + final BytesRef ref = adaptedStoredSource.toBytesRef(); context.doc().add(new StoredField(fieldType().name(), ref.bytes, ref.offset, ref.length)); } - boolean enableRecoverySource = context.indexSettings().isRecoverySourceEnabled(); - if (enableRecoverySource && originalSource != null && adaptedSource != originalSource) { - // if we omitted source or modified it we add the _recovery_source to ensure we have it for ops based recovery - BytesRef ref = originalSource.toBytesRef(); - if (context.indexSettings().isRecoverySourceSyntheticEnabled()) { - assert isSynthetic() : "recovery source should not be disabled on non-synthetic source"; - /** - * We use synthetic source for recovery, so we omit the recovery source. - * Instead, we record only the size of the uncompressed source. - * This size is used in {@link LuceneSyntheticSourceChangesSnapshot} to control memory - * usage during the recovery process when loading a batch of synthetic sources. - */ - context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_SIZE_NAME, originalSourceLength)); - } else { - context.doc().add(new StoredField(RECOVERY_SOURCE_NAME, ref.bytes, ref.offset, ref.length)); - context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_NAME, 1)); - } + if (context.indexSettings().isRecoverySourceEnabled() == false) { + // Recovery source is disabled; skip adding recovery source fields. + return; + } + + if (context.indexSettings().isRecoverySourceSyntheticEnabled()) { + assert isSynthetic() : "Recovery source should not be disabled for non-synthetic sources"; + // Synthetic source recovery is enabled; omit the full recovery source. + // Instead, store only the size of the uncompressed original source. + // This size is used by LuceneSyntheticSourceChangesSnapshot to manage memory usage + // when loading batches of synthetic sources during recovery. + context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_SIZE_NAME, originalSource.length())); + } else if (stored() == false || adaptedStoredSource != storedSource) { + // If the source is missing (due to synthetic source or disabled mode) + // or has been altered (via source filtering), store a reduced recovery source. + // This includes the original source with synthetic vector fields removed for operation-based recovery. + var recoverySource = removeSyntheticVectorFields(context.mappingLookup(), originalSource, contentType).toBytesRef(); + context.doc().add(new StoredField(RECOVERY_SOURCE_NAME, recoverySource.bytes, recoverySource.offset, recoverySource.length)); + context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_NAME, 1)); } } /** - * Removes the {@link InferenceMetadataFieldsMapper} content from the {@code _source} if it is present. - * This metadata is regenerated at query or snapshot recovery time using stored fields and doc values. + * Removes the synthetic vector fields (_inference and synthetic vector fields) from the {@code _source} if it is present. + * These fields are regenerated at query or snapshot recovery time using stored fields and doc values. * *

For details on how the metadata is re-added, see:

*
    - *
  • {@link SearchBasedChangesSnapshot#addSourceMetadata(BytesReference, int)}
  • + *
  • {@link SearchBasedChangesSnapshot#addSyntheticFields(Source, int)}
  • *
  • {@link FetchSourcePhase#getProcessor(FetchContext)}
  • *
*/ - private BytesReference removeInferenceMetadataFields( + private BytesReference removeSyntheticVectorFields( MappingLookup mappingLookup, @Nullable BytesReference originalSource, @Nullable XContentType contentType - ) { - if (originalSource != null - && InferenceMetadataFieldsMapper.isEnabled(mappingLookup) - && mappingLookup.inferenceFields().isEmpty() == false) { - return Source.fromBytes(originalSource, contentType) - .filter(new SourceFilter(new String[] {}, new String[] { InferenceMetadataFieldsMapper.NAME })) - .internalSourceRef(); - } else { + ) throws IOException { + if (originalSource == null) { + return null; + } + Set excludes = new HashSet<>(); + if (InferenceMetadataFieldsMapper.isEnabled(mappingLookup) && mappingLookup.inferenceFields().isEmpty() == false) { + excludes.add(InferenceMetadataFieldsMapper.NAME); + } + if (excludes.isEmpty() && mappingLookup.syntheticVectorFields().isEmpty()) { return originalSource; } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput); + try ( + XContentParser parser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY.withFiltering(Set.of(), excludes, true), + originalSource, + contentType + ) + ) { + if ((parser.currentToken() == null) && (parser.nextToken() == null)) { + return originalSource; + } + // Removes synthetic vector fields from the source while preserving empty parent objects, + // ensuring that the fields can later be rehydrated in their original locations. + removeSyntheticVectorFields(builder.generator(), parser, "", mappingLookup.syntheticVectorFields()); + return BytesReference.bytes(builder); + } } @Nullable @@ -552,4 +577,42 @@ public static boolean onOrAfterDeprecateModeVersion(IndexVersion version) { return version.onOrAfter(IndexVersions.DEPRECATE_SOURCE_MODE_MAPPER) || version.between(IndexVersions.V8_DEPRECATE_SOURCE_MODE_MAPPER, IndexVersions.UPGRADE_TO_LUCENE_10_0_0); } + + private static void removeSyntheticVectorFields( + XContentGenerator destination, + XContentParser parser, + String fullPath, + Set patchFullPaths + ) throws IOException { + XContentParser.Token token = parser.currentToken(); + if (token == XContentParser.Token.FIELD_NAME) { + String fieldName = parser.currentName(); + token = parser.nextToken(); + fullPath = fullPath + (fullPath.isEmpty() ? "" : ".") + fieldName; + if (patchFullPaths.contains(fullPath)) { + parser.skipChildren(); + return; + } + destination.writeFieldName(fieldName); + } + + switch (token) { + case START_ARRAY -> { + destination.writeStartArray(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + removeSyntheticVectorFields(destination, parser, fullPath, patchFullPaths); + } + destination.writeEndArray(); + } + case START_OBJECT -> { + destination.writeStartObject(); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + removeSyntheticVectorFields(destination, parser, fullPath, patchFullPaths); + } + destination.writeEndObject(); + } + default -> // others are simple: + destination.copyCurrentEvent(parser); + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java index 27b4f4eb0ae76..54d44219231f0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java @@ -10,7 +10,9 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; @@ -111,7 +113,7 @@ public Set requiredStoredFields() { } /** - * Reconstructs {@code _source} from doc values anf stored fields. + * Reconstructs {@code _source} from doc values and stored fields. */ class Synthetic implements SourceLoader { private final SourceFilter filter; @@ -403,4 +405,142 @@ public void reset() { // since DocValuesLoader#advanceToDoc will reset the state anyway. } } + + class SyntheticVectors implements SourceLoader { + final SourceLoader sourceLoader; + final SyntheticVectorsLoader patchLoader; + + SyntheticVectors(@Nullable SourceFilter sourceFilter, SyntheticVectorsLoader patchLoader) { + this.sourceLoader = sourceFilter == null ? FROM_STORED_SOURCE : new Stored(sourceFilter); + this.patchLoader = patchLoader; + } + + @Override + public boolean reordersFieldValues() { + return false; + } + + @Override + public Set requiredStoredFields() { + return sourceLoader.requiredStoredFields(); + } + + @Override + public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { + var sourceLeaf = sourceLoader.leaf(reader, docIdsInLeaf); + var patchLeaf = patchLoader.leaf(reader.getContext()); + return new Leaf() { + @Override + public Source source(LeafStoredFieldLoader storedFields, int docId) throws IOException { + Source source = sourceLeaf.source(storedFields, docId); + if (patchLeaf == null) { + return source; + } + List patches = new ArrayList<>(); + patchLeaf.load(docId, patches); + if (patches.size() == 0) { + return source; + } + return applySyntheticVectors(source, patches); + } + + @Override + public void write(LeafStoredFieldLoader storedFields, int docId, XContentBuilder b) throws IOException { + throw new IllegalStateException("This operation is not allowed in the current context"); + } + }; + } + } + + /** + * Applies a list of {@link SyntheticVectorPatch} instances to the given {@link Source}. + * + * @param originalSource the original source object + * @param patches the list of patches to apply + * @return a new {@link Source} with the patches applied + */ + static Source applySyntheticVectors(Source originalSource, List patches) { + Map newMap = originalSource.source(); + applyPatches("", newMap, patches); + return Source.fromMap(newMap, originalSource.sourceContentType()); + } + + /** + * Recursively applies synthetic vector patches to a nested map. + * + * @param rootPath the current root path for nested structures + * @param map the map to apply patches to + * @param patches the list of patches to apply + */ + private static void applyPatches(String rootPath, Map map, List patches) { + for (SyntheticVectorPatch patch : patches) { + if (patch instanceof LeafSyntheticVectorPath leaf) { + String key = extractRelativePath(rootPath, leaf.fullPath()); + map.put(key, leaf.value()); + } else if (patch instanceof NestedSyntheticVectorPath nested) { + String nestedPath = extractRelativePath(rootPath, nested.fullPath()); + List> nestedMaps = XContentMapValues.extractNestedSources(nestedPath, map); + for (SyntheticVectorPatch childPatch : nested.children()) { + if (childPatch instanceof NestedOffsetSyntheticVectorPath offsetPatch) { + Map nestedMap = XContentMapValues.nodeMapValue(nestedMaps.get(offsetPatch.offset()), nestedPath); + applyPatches(nested.fullPath(), nestedMap, offsetPatch.children()); + } else { + throw new IllegalStateException( + "Unexpected child patch type of " + patch.getClass().getSimpleName() + " in nested structure." + ); + } + } + } else { + throw new IllegalStateException("Unknown patch type: " + patch.getClass().getSimpleName()); + } + } + } + + private static String extractRelativePath(String rootPath, String fullPath) { + return rootPath.isEmpty() ? fullPath : fullPath.substring(rootPath.length() + 1); + } + + /** + * Represents a patch to be applied to a source structure. + */ + sealed interface SyntheticVectorPatch permits NestedSyntheticVectorPath, NestedOffsetSyntheticVectorPath, LeafSyntheticVectorPath {} + + /** + * A patch representing a nested path with further child patches. + * + * @param fullPath the full dot-separated path + * @param children the list of child patches + */ + record NestedSyntheticVectorPath(String fullPath, List children) implements SyntheticVectorPatch {} + + /** + * A patch representing an indexed child within a nested structure. + * + * @param offset the index of the nested element + * @param children the list of child patches to apply at this offset + */ + record NestedOffsetSyntheticVectorPath(int offset, List children) implements SyntheticVectorPatch {} + + /** + * A patch representing a leaf field with a value to be applied. + * + * @param fullPath the fully-qualified field name + * @param value the value to assign + */ + record LeafSyntheticVectorPath(String fullPath, Object value) implements SyntheticVectorPatch {} + + interface SyntheticVectorsLoader { + /** + * Returns a leaf loader if the provided context contains patches for the specified field; + * returns null otherwise. + */ + SyntheticVectorsLoader.Leaf leaf(LeafReaderContext context) throws IOException; + + interface Leaf { + /** + * Loads all patches for this field associated with the provided document into the specified {@code acc} list. + */ + void load(int doc, List acc) throws IOException; + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 755435d98d447..db6592e60f0af 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -100,6 +101,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Arrays; import java.util.HexFormat; import java.util.List; @@ -115,6 +117,7 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED; import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; @@ -252,8 +255,9 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; + final boolean isSyntheticVector; - public Builder(String name, IndexVersion indexVersionCreated) { + public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { super(name); this.indexVersionCreated = indexVersionCreated; // This is defined as updatable because it can be updated once, from [null] to a valid dim size, @@ -285,6 +289,7 @@ public Builder(String name, IndexVersion indexVersionCreated) { } } }); + this.isSyntheticVector = isSyntheticVector; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW); @@ -426,6 +431,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity validate(); + boolean isSyntheticVectorFinal = (context.isSourceSynthetic() == false) && indexed.getValue() && isSyntheticVector; return new DenseVectorFieldMapper( leafName(), new DenseVectorFieldType( @@ -441,7 +447,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { ), builderParams(this, context), indexOptions.getValue(), - indexVersionCreated + indexVersionCreated, + isSyntheticVectorFinal ); } } @@ -2372,7 +2379,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.indexVersionCreated()), + (n, c) -> new Builder( + n, + c.getIndexSettings().getIndexVersionCreated(), + INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + ), notInMultiFields(CONTENT_TYPE) ); @@ -2826,17 +2837,24 @@ public List fetchValues(Source source, int doc, List ignoredValu private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; + private final boolean isSyntheticVector; private DenseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams params, DenseVectorIndexOptions indexOptions, - IndexVersion indexCreatedVersion + IndexVersion indexCreatedVersion, + boolean isSyntheticVector ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; + this.isSyntheticVector = isSyntheticVector; + } + + public boolean isSyntheticVector() { + return isSyntheticVector; } @Override @@ -2958,7 +2976,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion).init(this); + return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this); } private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @@ -3012,6 +3030,13 @@ public String toString() { }; } + @Override + public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { + return isSyntheticVector() + ? new SyntheticDenseVectorPatchLoader(new IndexedSyntheticFieldLoader(indexCreatedVersion, fieldType().similarity)) + : null; + } + @Override protected SyntheticSourceSupport syntheticSourceSupport() { return new SyntheticSourceSupport.Native( @@ -3022,15 +3047,16 @@ protected SyntheticSourceSupport syntheticSourceSupport() { } private class IndexedSyntheticFieldLoader extends SourceLoader.DocValuesBasedSyntheticFieldLoader { - private FloatVectorValues values; - private ByteVectorValues byteVectorValues; + private FloatVectorValues floatValues; + private ByteVectorValues byteValues; + private NumericDocValues magnitudeReader; + private boolean hasValue; private boolean hasMagnitude; private int ord; private final IndexVersion indexCreatedVersion; private final VectorSimilarity vectorSimilarity; - private NumericDocValues magnitudeReader; private IndexedSyntheticFieldLoader(IndexVersion indexCreatedVersion, VectorSimilarity vectorSimilarity) { this.indexCreatedVersion = indexCreatedVersion; @@ -3038,44 +3064,43 @@ private IndexedSyntheticFieldLoader(IndexVersion indexCreatedVersion, VectorSimi } @Override - public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { - values = leafReader.getFloatVectorValues(fullPath()); - if (values != null) { - if (indexCreatedVersion.onOrAfter(NORMALIZE_COSINE) && VectorSimilarity.COSINE.equals(vectorSimilarity)) { - magnitudeReader = leafReader.getNumericDocValues(fullPath() + COSINE_MAGNITUDE_FIELD_SUFFIX); - } - KnnVectorValues.DocIndexIterator iterator = values.iterator(); - return docId -> { - if (iterator.docID() > docId) { - return hasValue = false; - } - if (iterator.docID() == docId) { - return hasValue = true; - } - hasValue = docId == iterator.advance(docId); - hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId); - ord = iterator.index(); - return hasValue; - }; + public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + floatValues = reader.getFloatVectorValues(fullPath()); + if (floatValues != null) { + if (shouldNormalize()) { + magnitudeReader = reader.getNumericDocValues(fullPath() + COSINE_MAGNITUDE_FIELD_SUFFIX); + } + return createLoader(floatValues.iterator(), true); } - byteVectorValues = leafReader.getByteVectorValues(fullPath()); - if (byteVectorValues != null) { - KnnVectorValues.DocIndexIterator iterator = byteVectorValues.iterator(); - return docId -> { - if (iterator.docID() > docId) { - return hasValue = false; - } - if (iterator.docID() == docId) { - return hasValue = true; - } - hasValue = docId == iterator.advance(docId); - ord = iterator.index(); - return hasValue; - }; + + byteValues = reader.getByteVectorValues(fullPath()); + if (byteValues != null) { + return createLoader(byteValues.iterator(), false); } + return null; } + private boolean shouldNormalize() { + return indexCreatedVersion.onOrAfter(NORMALIZE_COSINE) && VectorSimilarity.COSINE.equals(vectorSimilarity); + } + + private DocValuesLoader createLoader(KnnVectorValues.DocIndexIterator iterator, boolean checkMagnitude) { + return docId -> { + if (iterator.docID() > docId) { + return hasValue = false; + } + if (iterator.docID() == docId || iterator.advance(docId) == docId) { + ord = iterator.index(); + hasValue = true; + hasMagnitude = checkMagnitude && magnitudeReader != null && magnitudeReader.advanceExact(docId); + } else { + hasValue = false; + } + return hasValue; + }; + } + @Override public boolean hasValue() { return hasValue; @@ -3086,28 +3111,55 @@ public void write(XContentBuilder b) throws IOException { if (false == hasValue) { return; } - float magnitude = Float.NaN; - if (hasMagnitude) { - magnitude = Float.intBitsToFloat((int) magnitudeReader.longValue()); - } + float magnitude = hasMagnitude ? Float.intBitsToFloat((int) magnitudeReader.longValue()) : Float.NaN; b.startArray(leafName()); - if (values != null) { - for (float v : values.vectorValue(ord)) { - if (hasMagnitude) { - b.value(v * magnitude); - } else { - b.value(v); - } + if (floatValues != null) { + for (float v : floatValues.vectorValue(ord)) { + b.value(hasMagnitude ? v * magnitude : v); } - } else if (byteVectorValues != null) { - byte[] vectorValue = byteVectorValues.vectorValue(ord); - for (byte value : vectorValue) { - b.value(value); + } else if (byteValues != null) { + for (byte v : byteValues.vectorValue(ord)) { + b.value(v); } } b.endArray(); } + /** + * Returns a deep-copied vector for the current document, either as a list of floats + * (with optional cosine normalization) or a list of bytes. + * + * @throws IOException if reading fails + */ + public Object copyVectorAsList() throws IOException { + assert hasValue : "vector is null for ord=" + ord; + if (floatValues != null) { + float[] raw = floatValues.vectorValue(ord); + List copyList = new ArrayList<>(raw.length); + + if (hasMagnitude) { + float mag = Float.intBitsToFloat((int) magnitudeReader.longValue()); + for (int i = 0; i < raw.length; i++) { + copyList.add(raw[i] * mag); + } + } else { + for (int i = 0; i < raw.length; i++) { + copyList.add(raw[i]); + } + } + return copyList; + } else if (byteValues != null) { + byte[] raw = byteValues.vectorValue(ord); + List copyList = new ArrayList<>(raw.length); + for (int i = 0; i < raw.length; i++) { + copyList.add(raw[i]); + } + return copyList; + } + + throw new IllegalStateException("No vector values available to copy."); + } + @Override public String fieldName() { return fullPath(); @@ -3170,6 +3222,30 @@ public String fieldName() { } } + public class SyntheticDenseVectorPatchLoader implements SourceLoader.SyntheticVectorsLoader { + private final IndexedSyntheticFieldLoader syntheticFieldLoader; + + public SyntheticDenseVectorPatchLoader(IndexedSyntheticFieldLoader syntheticFieldLoader) { + this.syntheticFieldLoader = syntheticFieldLoader; + } + + public SourceLoader.SyntheticVectorsLoader.Leaf leaf(LeafReaderContext context) throws IOException { + var dvLoader = syntheticFieldLoader.docValuesLoader(context.reader(), null); + return (doc, acc) -> { + if (dvLoader == null) { + return; + } + dvLoader.advanceToDoc(doc); + if (syntheticFieldLoader.hasValue()) { + // add vectors as list since that's how they're parsed from xcontent. + acc.add( + new SourceLoader.LeafSyntheticVectorPath(syntheticFieldLoader.fieldName(), syntheticFieldLoader.copyVectorAsList()) + ); + } + }; + } + } + /** * Interface for a function that takes a int and boolean */ diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index f1495e06ef51d..5e215bb75d8bb 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -12,6 +12,8 @@ import java.util.HashSet; import java.util.Set; +import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; + /** * A {@link Set} of "capabilities" supported by the {@link RestSearchAction}. */ @@ -53,6 +55,7 @@ private SearchCapabilities() {} private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param"; private static final String DENSE_VECTOR_UPDATABLE_BBQ = "dense_vector_updatable_bbq"; private static final String FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV = "field_exists_query_for_text_fields_no_index_or_dv"; + private static final String SYNTHETIC_VECTORS_SETTING = "synthetic_vectors_setting"; public static final Set CAPABILITIES; static { @@ -77,6 +80,9 @@ private SearchCapabilities() {} capabilities.add(EXCLUDE_VECTORS_PARAM); capabilities.add(DENSE_VECTOR_UPDATABLE_BBQ); capabilities.add(FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV); + if (SYNTHETIC_VECTORS) { + capabilities.add(SYNTHETIC_VECTORS_SETTING); + } CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 0dd9cc3622fae..fba268b5bd809 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -13,13 +13,10 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.TotalHits; -import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.IdLoader; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.search.LeafNestedDocuments; import org.elasticsearch.search.NestedDocuments; @@ -28,12 +25,10 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; -import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.fetch.subphase.InnerHitsContext; import org.elasticsearch.search.fetch.subphase.InnerHitsPhase; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.Source; -import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.search.lookup.SourceProvider; import org.elasticsearch.search.profile.ProfileResult; import org.elasticsearch.search.profile.Profilers; @@ -50,7 +45,8 @@ import java.util.List; import java.util.Map; import java.util.function.Supplier; -import java.util.stream.Collectors; + +import static org.elasticsearch.index.get.ShardGetService.maybeExcludeSyntheticVectorFields; /** * Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified @@ -122,8 +118,16 @@ private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Pr // - Speed up retrieval of the synthetic source // Note: These vectors will no longer be accessible via _source for any sub-fetch processors, // but they are typically accessed through doc values instead (e.g: re-scorer). - SourceFilter sourceFilter = maybeExcludeNonSemanticTextVectorFields(context); - SourceLoader sourceLoader = context.newSourceLoader(sourceFilter); + var res = maybeExcludeSyntheticVectorFields( + context.getSearchExecutionContext().getMappingLookup(), + context.getSearchExecutionContext().getIndexSettings(), + context.fetchSourceContext(), + context.fetchFieldsContext() + ); + if (context.fetchSourceContext() != res.v1()) { + context.fetchSourceContext(res.v1()); + } + SourceLoader sourceLoader = context.newSourceLoader(res.v2()); FetchContext fetchContext = new FetchContext(context, sourceLoader); PreloadedSourceProvider sourceProvider = new PreloadedSourceProvider(); @@ -444,70 +448,4 @@ public String toString() { } }; } - - /** - * Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}. - * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise. - */ - private static boolean shouldExcludeVectorsFromSource(SearchContext context) { - if (context.fetchSourceContext() == null) { - return false; - } - return context.fetchSourceContext().excludeVectors() != null && context.fetchSourceContext().excludeVectors(); - } - - /** - * Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields, - * unless vectors are explicitly requested to be included in the source. - * Returns {@code null} when vectors should not be filtered out. - */ - private static SourceFilter maybeExcludeNonSemanticTextVectorFields(SearchContext context) { - if (shouldExcludeVectorsFromSource(context) == false) { - return null; - } - var lookup = context.getSearchExecutionContext().getMappingLookup(); - var fetchFieldsAut = context.fetchFieldsContext() != null && context.fetchFieldsContext().fields().size() > 0 - ? new CharacterRunAutomaton( - Regex.simpleMatchToAutomaton(context.fetchFieldsContext().fields().stream().map(f -> f.field).toArray(String[]::new)) - ) - : null; - var inferenceFieldsAut = lookup.inferenceFields().size() > 0 - ? new CharacterRunAutomaton( - Regex.simpleMatchToAutomaton(lookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new)) - ) - : null; - - List lateExcludes = new ArrayList<>(); - var excludes = lookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> { - // Exclude the field specified by the `fields` option - if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) { - lateExcludes.add(f.name()); - return false; - } - // Exclude vectors from semantic text fields, as they are processed separately - return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false; - }).map(f -> f.name()).collect(Collectors.toList()); - - if (lateExcludes.size() > 0) { - /** - * Adds the vector field specified by the `fields` option to the excludes list of the fetch source context. - * This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}. - */ - if (context.fetchSourceContext() != null && context.fetchSourceContext().excludes() != null) { - for (var exclude : context.fetchSourceContext().excludes()) { - lateExcludes.add(exclude); - } - } - var fetchSourceContext = context.fetchSourceContext() == null - ? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new)) - : FetchSourceContext.of( - context.fetchSourceContext().fetchSource(), - context.fetchSourceContext().excludeVectors(), - context.fetchSourceContext().includes(), - lateExcludes.toArray(String[]::new) - ); - context.fetchSourceContext(fetchSourceContext); - } - return excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new)); - } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java index 367635a2d8c3a..f35794c0438c3 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java @@ -41,6 +41,8 @@ public class FetchSourceContext implements Writeable, ToXContentObject { public static final ParseField EXCLUDES_FIELD = new ParseField("excludes", "exclude"); public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true, null, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); + public static final FetchSourceContext FETCH_ALL_SOURCE = new FetchSourceContext(true, false, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); + public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext( false, null, @@ -155,8 +157,10 @@ public static FetchSourceContext parseFromRestRequest(RestRequest request) { sourceExcludes = Strings.splitStringByCommaToArray(sExcludes); } - if (fetchSource != null || sourceIncludes != null || sourceExcludes != null) { - return FetchSourceContext.of(fetchSource == null || fetchSource, sourceIncludes, sourceExcludes); + Boolean excludeVectors = request.paramAsBoolean("_source_exclude_vectors", null); + + if (excludeVectors != null || fetchSource != null || sourceIncludes != null || sourceExcludes != null) { + return FetchSourceContext.of(fetchSource == null || fetchSource, excludeVectors, sourceIncludes, sourceExcludes); } return null; } diff --git a/server/src/main/java/org/elasticsearch/search/vectors/VectorData.java b/server/src/main/java/org/elasticsearch/search/vectors/VectorData.java index 7e330a44afab8..aaddd3499556b 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/VectorData.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/VectorData.java @@ -48,6 +48,10 @@ public VectorData(StreamInput in) throws IOException { } } + public boolean isFloat() { + return floatVector != null; + } + public byte[] asByteVector() { if (byteVector != null) { return byteVector; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 0f161d4a1e44f..75b70f228321e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -11,21 +11,28 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.CodecService; @@ -54,6 +61,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; import org.junit.AssumptionViolatedException; import java.io.IOException; @@ -66,9 +74,11 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; +import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -2067,9 +2077,9 @@ public void testValidateOnBuild() { int dimensions = randomIntBetween(64, 1024); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current()).elementType(ElementType.FLOAT) - .dimensions(dimensions) - .build(context); + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false).elementType( + ElementType.FLOAT + ).dimensions(dimensions).build(context); // Change the element type to byte, which is incompatible with int8 HNSW index options DenseVectorFieldMapper.Builder builder = (DenseVectorFieldMapper.Builder) mapper.getMergeBuilder(); @@ -2910,6 +2920,249 @@ public void testInvalidVectorDimensions() { } } + public void testSyntheticVectorsMinimalValidDocument() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDoc(type, true, true, false, false, false); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + public void testSyntheticVectorsFullDocument() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDoc(type, true, true, true, true, false); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + public void testSyntheticVectorsWithUnmappedFields() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDoc(type, true, true, true, true, true); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + public void testSyntheticVectorsMissingRootFields() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDoc(type, false, false, false, false, false); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + public void testSyntheticVectorsPartialNestedContent() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDoc(type, true, true, true, false, false); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + public void testFlatPathDocument() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + for (XContentType type : XContentType.values()) { + BytesReference source = generateRandomDocWithFlatPath(type); + assertSyntheticVectors(buildVectorMapping(), source, type); + } + } + + private static String buildVectorMapping() throws IOException { + try (XContentBuilder builder = XContentBuilder.builder(XContentType.JSON.xContent())) { + builder.startObject(); // root + builder.startObject("_doc"); + builder.field("dynamic", "false"); + + builder.startObject("properties"); + + // field + builder.startObject("field"); + builder.field("type", "keyword"); + builder.endObject(); + + // emb + builder.startObject("emb"); + builder.field("type", "dense_vector"); + builder.field("dims", 3); + builder.field("similarity", "cosine"); + builder.endObject(); + + // another_field + builder.startObject("another_field"); + builder.field("type", "keyword"); + builder.endObject(); + + // obj + builder.startObject("obj"); + builder.startObject("properties"); + + // nested + builder.startObject("nested"); + builder.field("type", "nested"); + builder.startObject("properties"); + + // nested.field + builder.startObject("field"); + builder.field("type", "keyword"); + builder.endObject(); + + // nested.emb + builder.startObject("emb"); + builder.field("type", "dense_vector"); + builder.field("dims", 3); + builder.field("similarity", "cosine"); + builder.endObject(); + + // double_nested + builder.startObject("double_nested"); + builder.field("type", "nested"); + builder.startObject("properties"); + + // double_nested.field + builder.startObject("field"); + builder.field("type", "keyword"); + builder.endObject(); + + // double_nested.emb + builder.startObject("emb"); + builder.field("type", "dense_vector"); + builder.field("dims", 3); + builder.field("similarity", "cosine"); + builder.endObject(); + + builder.endObject(); // double_nested.properties + builder.endObject(); // double_nested + + builder.endObject(); // nested.properties + builder.endObject(); // nested + + builder.endObject(); // obj.properties + builder.endObject(); // obj + + builder.endObject(); // properties + builder.endObject(); // _doc + builder.endObject(); // root + + return Strings.toString(builder); + } + } + + private BytesReference generateRandomDoc( + XContentType xContentType, + boolean includeRootField, + boolean includeVector, + boolean includeNested, + boolean includeDoubleNested, + boolean includeUnmapped + ) throws IOException { + try (var builder = XContentBuilder.builder(xContentType.xContent())) { + builder.startObject(); + + if (includeRootField) { + builder.field("field", randomAlphaOfLengthBetween(1, 2)); + } + + if (includeVector) { + builder.array("emb", new float[] { 1, 2, 3 }); + } + + if (includeUnmapped) { + builder.field("unmapped_field", "extra"); + } + + builder.startObject("obj"); + if (includeNested) { + builder.startArray("nested"); + + // Entry with just a field + builder.startObject(); + builder.field("field", randomAlphaOfLengthBetween(3, 6)); + builder.endObject(); + + // Empty object + builder.startObject(); + builder.endObject(); + + // Entry with emb and double_nested + if (includeDoubleNested) { + builder.startObject(); + builder.array("emb", new float[] { 1, 2, 3 }); + builder.field("field", "nested_val"); + builder.startArray("double_nested"); + for (int i = 0; i < 2; i++) { + builder.startObject(); + builder.array("emb", new float[] { 1, 2, 3 }); + builder.field("field", "dn_field"); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + } + + builder.endArray(); + } + builder.endObject(); + + builder.endObject(); + return BytesReference.bytes(builder); + } + } + + private BytesReference generateRandomDocWithFlatPath(XContentType xContentType) throws IOException { + try (var builder = XContentBuilder.builder(xContentType.xContent())) { + builder.startObject(); + + // Root-level fields + builder.field("field", randomAlphaOfLengthBetween(1, 2)); + builder.array("emb", new float[] { 1, 2, 3 }); + builder.field("another_field", randomAlphaOfLengthBetween(3, 5)); + + // Simulated flattened "obj.nested" + builder.startObject("obj.nested"); + + builder.field("field", randomAlphaOfLengthBetween(4, 8)); + builder.array("emb", new float[] { 1, 2, 3 }); + + builder.startArray("double_nested"); + for (int i = 0; i < randomIntBetween(1, 2); i++) { + builder.startObject(); + builder.field("field", randomAlphaOfLengthBetween(4, 8)); + builder.array("emb", new float[] { 1, 2, 3 }); + builder.endObject(); + } + builder.endArray(); + + builder.endObject(); // end obj.nested + + builder.endObject(); + return BytesReference.bytes(builder); + } + } + + private void assertSyntheticVectors(String mapping, BytesReference source, XContentType xContentType) throws IOException { + var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(); + MapperService mapperService = createMapperService(settings, mapping); + var parsedDoc = mapperService.documentMapper().parse(new SourceToParse("0", source, xContentType)); + try (var directory = newDirectory()) { + IndexWriterConfig config = newIndexWriterConfig(random(), new StandardAnalyzer()); + try (var iw = new RandomIndexWriter(random(), directory, config)) { + parsedDoc.updateSeqID(0, 1); + parsedDoc.version().setLongValue(0); + iw.addDocuments(parsedDoc.docs()); + } + try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + var provider = SourceProvider.fromLookup( + mapperService.mappingLookup(), + null, + mapperService.getMapperMetrics().sourceFieldMetrics() + ); + var searchSource = provider.getSource(indexReader.leaves().get(0), parsedDoc.docs().size() - 1); + assertToXContentEquivalent(source, searchSource.internalSourceRef(), xContentType); + } + } + } + @Override protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java new file mode 100644 index 0000000000000..138d138b741e5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java @@ -0,0 +1,263 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.engine.Engine; +import org.elasticsearch.index.engine.EngineTestCase; +import org.elasticsearch.index.engine.LuceneChangesSnapshot; +import org.elasticsearch.index.engine.LuceneSyntheticSourceChangesSnapshot; +import org.elasticsearch.index.engine.SearchBasedChangesSnapshot; +import org.elasticsearch.index.engine.TranslogOperationAsserter; +import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.translog.Translog; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; +import static org.hamcrest.Matchers.equalTo; + +public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase { + private final boolean useSynthetic; + private final boolean useSyntheticRecovery; + private final boolean useIncludesExcludes; + + public SyntheticVectorFieldsRecoveryTests(boolean useSynthetic, boolean useSyntheticRecovery, boolean useIncludesExcludes) { + this.useSynthetic = useSynthetic; + this.useSyntheticRecovery = useSyntheticRecovery; + this.useIncludesExcludes = useIncludesExcludes; + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return List.of( + new Object[] { false, false, false }, + new Object[] { false, false, true }, + new Object[] { true, false, false }, + new Object[] { true, true, false } + ); + } + + @Override + protected Settings indexSettings() { + var builder = Settings.builder().put(super.indexSettings()); + if (useSynthetic) { + builder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.SYNTHETIC.name()); + builder.put(IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE_SETTING.getKey(), useSyntheticRecovery); + } + builder.put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true); + return builder.build(); + } + + @Override + protected String defaultMapping() { + try { + XContentBuilder builder = JsonXContent.contentBuilder().startObject(); + if (useIncludesExcludes) { + builder.startObject(SourceFieldMapper.NAME).array("excludes", "field").endObject(); + } + builder.field("dynamic", false); + builder.startObject("properties"); + + builder.startObject("field"); + builder.field("type", "keyword"); + builder.endObject(); + + builder.startObject("emb"); + builder.field("type", "dense_vector"); + builder.field("dims", 10); + builder.field("similarity", "l2_norm"); + builder.endObject(); + + builder.startObject("nested"); + builder.field("type", "nested"); + builder.startObject("properties"); + builder.startObject("emb"); + builder.field("type", "dense_vector"); + builder.field("dims", 10); + builder.field("similarity", "l2_norm"); + builder.endObject(); + builder.endObject(); + builder.endObject(); + + builder.endObject(); + builder.endObject(); + return BytesReference.bytes(builder).utf8ToString(); + } catch (IOException exc) { + throw new RuntimeException(exc); + } + } + + public void testSnapshotRecovery() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + List expectedOperations = new ArrayList<>(); + int size = randomIntBetween(10, 50); + for (int i = 0; i < size; i++) { + var source = randomSource(); + var sourceToParse = new SourceToParse(Integer.toString(i), source, XContentType.JSON, null); + var doc = mapperService.documentMapper().parse(sourceToParse); + assertNull(doc.dynamicMappingsUpdate()); + if (useSynthetic) { + if (useSyntheticRecovery) { + assertNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_NAME)); + assertNotNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME)); + } else { + assertNotNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_NAME)); + assertNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME)); + } + } else { + if (useIncludesExcludes) { + assertNotNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_NAME)); + var originalSource = new BytesArray(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_NAME).binaryValue()); + var map = XContentHelper.convertToMap(originalSource, false, XContentType.JSON); + assertThat(map.v2().size(), Matchers.anyOf(equalTo(1), equalTo(2))); + assertNull(map.v2().remove(InferenceMetadataFieldsMapper.NAME)); + } else { + assertNull(doc.rootDoc().getField(SourceFieldMapper.RECOVERY_SOURCE_NAME)); + } + } + var op = indexForDoc(doc); + var result = engine.index(op); + expectedOperations.add( + new Translog.Index( + result.getId(), + result.getSeqNo(), + result.getTerm(), + result.getVersion(), + op.source(), + op.routing(), + op.getAutoGeneratedIdTimestamp() + ) + ); + + if (frequently()) { + engine.flush(); + } + } + engine.flush(); + + var searcher = engine.acquireSearcher("test", Engine.SearcherScope.INTERNAL); + try ( + var snapshot = newRandomSnapshot( + engine.config().getMapperService(), + searcher, + SearchBasedChangesSnapshot.DEFAULT_BATCH_SIZE, + 0, + size - 1, + true, + randomBoolean(), + randomBoolean(), + IndexVersion.current() + ) + ) { + var asserter = TranslogOperationAsserter.withEngineConfig(engine.config()); + for (int i = 0; i < size; i++) { + var op = snapshot.next(); + assertThat(op.opType(), equalTo(Translog.Operation.Type.INDEX)); + Translog.Index indexOp = (Translog.Index) op; + asserter.assertSameIndexOperation(indexOp, expectedOperations.get(i)); + } + assertNull(snapshot.next()); + } + } + + private Translog.Snapshot newRandomSnapshot( + MapperService mapperService, + Engine.Searcher engineSearcher, + int searchBatchSize, + long fromSeqNo, + long toSeqNo, + boolean requiredFullRange, + boolean singleConsumer, + boolean accessStats, + IndexVersion indexVersionCreated + ) throws IOException { + if (useSyntheticRecovery) { + return new LuceneSyntheticSourceChangesSnapshot( + mapperService, + engineSearcher, + searchBatchSize, + randomLongBetween(0, ByteSizeValue.ofBytes(Integer.MAX_VALUE).getBytes()), + fromSeqNo, + toSeqNo, + requiredFullRange, + accessStats, + indexVersionCreated + ); + } else { + return new LuceneChangesSnapshot( + mapperService, + engineSearcher, + searchBatchSize, + fromSeqNo, + toSeqNo, + requiredFullRange, + singleConsumer, + accessStats, + indexVersionCreated + ); + } + } + + private BytesReference randomSource() throws IOException { + var builder = JsonXContent.contentBuilder().startObject(); + builder.field("field", randomAlphaOfLengthBetween(10, 30)); + if (rarely()) { + return BytesReference.bytes(builder.endObject()); + } + + if (usually()) { + builder.field("emb", randomVector()); + } + + if (randomBoolean()) { + int numNested = randomIntBetween(0, 6); + builder.startArray("nested"); + for (int i = 0; i < numNested; i++) { + builder.startObject(); + if (randomBoolean()) { + builder.field("paragraph_id", i); + } + if (randomBoolean()) { + builder.field("emb", randomVector()); + } + builder.endObject(); + } + builder.endArray(); + } + builder.endObject(); + return BytesReference.bytes(builder); + } + + private static float[] randomVector() { + float[] vector = new float[10]; + for (int i = 0; i < 10; i++) { + vector[i] = randomByte(); + } + return vector; + } +} diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index feb44261ce3ed..f38b6ef660eff 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -8,26 +8,35 @@ */ package org.elasticsearch.index.shard; +import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.EngineTestCase; import org.elasticsearch.index.engine.InternalEngine; import org.elasticsearch.index.engine.LiveVersionMapTestUtils; +import org.elasticsearch.index.engine.TranslogOperationAsserter; import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.get.GetResult; import org.elasticsearch.index.mapper.RoutingFieldMapper; +import org.elasticsearch.index.translog.Translog; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.Arrays; import java.util.function.LongSupplier; +import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.equalTo; @@ -132,6 +141,30 @@ public void testGetFromTranslogWithDenseVector() throws IOException { runGetFromTranslogWithOptions(docToIndex, "\"enabled\": true", null, docToIndex, "\"text\"", "foo", "\"dense_vector\"", false); } + public void testGetFromTranslogWithSyntheticVector() throws IOException { + assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); + float[] vector = new float[2048]; + for (int i = 0; i < vector.length; i++) { + vector[i] = randomByte(); + } + String docToIndex = Strings.format(""" + { + "bar": %s, + "foo": "foo" + } + """, Arrays.toString(vector)); + runGetFromTranslogWithOptions( + docToIndex, + "\"enabled\": true", + Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(), + docToIndex, + "\"text\"", + "foo", + "\"dense_vector\"", + true + ); + } + private void runGetFromTranslogWithOptions( String docToIndex, String sourceOptions, @@ -163,7 +196,6 @@ private void runGetFromTranslogWithOptions( String fieldTypeBar, boolean sourceOnlyFetchCreatesInMemoryReader ) throws IOException { - var indexSettingsBuilder = indexSettings(IndexVersion.current(), 1, 1); if (additionalSettings != null) { indexSettingsBuilder.put(additionalSettings); @@ -191,7 +223,12 @@ private void runGetFromTranslogWithOptions( assertFalse(testGet.getFields().containsKey(RoutingFieldMapper.NAME)); assertFalse(testGet.getFields().containsKey("foo")); assertFalse(testGet.getFields().containsKey("bar")); - assertThat(testGet.sourceRef() == null ? "" : testGet.sourceRef().utf8ToString(), equalTo(expectedResult)); + var asserter = TranslogOperationAsserter.withEngineConfig(primary.getEngine().getEngineConfig()); + if (testGet.sourceRef() == null) { + assertThat("", equalTo(expectedResult)); + } else { + asserter.assertSameIndexOperation(toIndexOp(testGet.sourceRef().utf8ToString()), toIndexOp(expectedResult)); + } try (Engine.Searcher searcher = primary.getEngine().acquireSearcher("test", Engine.SearcherScope.INTERNAL)) { assertEquals(searcher.getIndexReader().maxDoc(), 1); // we refreshed } @@ -199,7 +236,11 @@ private void runGetFromTranslogWithOptions( indexDoc(primary, "1", docToIndex, XContentType.JSON, "foobar"); assertTrue(primary.getEngine().refreshNeeded()); GetResult testGet1 = getForUpdate(primary, "1", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM); - assertEquals(testGet1.sourceRef() == null ? "" : testGet1.sourceRef().utf8ToString(), expectedResult); + if (testGet1.sourceRef() == null) { + assertThat("", equalTo(expectedResult)); + } else { + asserter.assertSameIndexOperation(toIndexOp(testGet1.sourceRef().utf8ToString()), toIndexOp(expectedResult)); + } assertTrue(testGet1.getFields().containsKey(RoutingFieldMapper.NAME)); assertFalse(testGet.getFields().containsKey("foo")); assertFalse(testGet.getFields().containsKey("bar")); @@ -220,7 +261,11 @@ private void runGetFromTranslogWithOptions( assertTrue(primary.getEngine().refreshNeeded()); GetResult testGet2 = primary.getService() .get("2", new String[] { "foo" }, true, 1, VersionType.INTERNAL, FetchSourceContext.FETCH_SOURCE, false); - assertEquals(testGet2.sourceRef() == null ? "" : testGet2.sourceRef().utf8ToString(), expectedResult); + if (testGet2.sourceRef() == null) { + assertThat("", equalTo(expectedResult)); + } else { + asserter.assertSameIndexOperation(toIndexOp(testGet2.sourceRef().utf8ToString()), toIndexOp(expectedResult)); + } assertTrue(testGet2.getFields().containsKey(RoutingFieldMapper.NAME)); assertTrue(testGet2.getFields().containsKey("foo")); assertEquals(expectedFooVal, testGet2.getFields().get("foo").getValue()); @@ -235,7 +280,11 @@ private void runGetFromTranslogWithOptions( testGet2 = primary.getService() .get("2", new String[] { "foo" }, true, 1, VersionType.INTERNAL, FetchSourceContext.FETCH_SOURCE, false); - assertEquals(testGet2.sourceRef() == null ? "" : testGet2.sourceRef().utf8ToString(), expectedResult); + if (testGet2.sourceRef() == null) { + assertThat("", equalTo(expectedResult)); + } else { + asserter.assertSameIndexOperation(toIndexOp(testGet2.sourceRef().utf8ToString()), toIndexOp(expectedResult)); + } assertTrue(testGet2.getFields().containsKey(RoutingFieldMapper.NAME)); assertTrue(testGet2.getFields().containsKey("foo")); assertEquals(expectedFooVal, testGet2.getFields().get("foo").getValue()); @@ -354,4 +403,19 @@ public void testGetFromTranslog() throws IOException { closeShards(primary); } + + Translog.Index toIndexOp(String source) throws IOException { + XContentParser parser = createParser(XContentType.JSON.xContent(), source); + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.copyCurrentStructure(parser); + return new Translog.Index( + "1", + 0, + 1, + 1, + new BytesArray(org.elasticsearch.common.Strings.toString(builder)), + null, + IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP + ); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 2327ac06b9e81..e31a0391b66ed 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -1419,7 +1419,8 @@ public static void assertConsistentHistoryBetweenTranslogAndLuceneIndex(Engine e assertThat(luceneOp.opType(), equalTo(translogOp.opType())); if (luceneOp.opType() == Translog.Operation.Type.INDEX) { if (engine.engineConfig.getIndexSettings().isRecoverySourceSyntheticEnabled() - || engine.engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false) { + || engine.engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false + || engine.engineConfig.getMapperService().mappingLookup().syntheticVectorFields().isEmpty() == false) { assertTrue( "luceneOp=" + luceneOp + " != translogOp=" + translogOp, translogOperationAsserter.assertSameIndexOperation((Translog.Index) luceneOp, (Translog.Index) translogOp) diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index a23a4b866998b..d0d6a4825e1d1 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -22,7 +22,8 @@ public enum FeatureFlag { USE_LUCENE101_POSTINGS_FORMAT("es.use_lucene101_postings_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null), IVF_FORMAT("es.ivf_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null), LOGS_STREAM("es.logs_stream_feature_flag_enabled=true", Version.fromString("9.1.0"), null), - PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null); + PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null), + SYNTHETIC_VECTORS("es.mapping_synthetic_vectors=true", Version.fromString("9.2.0"), null); public final String systemProperty; public final Version from; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 31e97098e2877..e16b41a28e274 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1175,7 +1175,8 @@ private static Mapper.Builder createEmbeddingsField( case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, - indexVersionCreated + indexVersionCreated, + false ); SimilarityMeasure similarity = modelSettings.similarity(); diff --git a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java index 0a394a63e1e6f..0291e55187278 100644 --- a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java +++ b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java @@ -53,6 +53,7 @@ public class CoreWithSecurityClientYamlTestSuiteIT extends ESClientYamlSuiteTest .feature(FeatureFlag.DOC_VALUES_SKIPPER) .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) + .feature(FeatureFlag.SYNTHETIC_VECTORS) .build(); public CoreWithSecurityClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {