Skip to content

Commit 5a4961b

Browse files
authored
Add synthetic vectors support for rank_vectors (#130715)
This change adds the support for synthetic vectors (added in #130382) in the rank_vectors field type.
1 parent dc19728 commit 5a4961b

File tree

7 files changed

+746
-285
lines changed

7 files changed

+746
-285
lines changed

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,10 +2853,6 @@ private DenseVectorFieldMapper(
28532853
this.isSyntheticVector = isSyntheticVector;
28542854
}
28552855

2856-
public boolean isSyntheticVector() {
2857-
return isSyntheticVector;
2858-
}
2859-
28602856
@Override
28612857
public DenseVectorFieldType fieldType() {
28622858
return (DenseVectorFieldType) super.fieldType();
@@ -3032,7 +3028,7 @@ public String toString() {
30323028

30333029
@Override
30343030
public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() {
3035-
return isSyntheticVector()
3031+
return isSyntheticVector
30363032
? new SyntheticDenseVectorPatchLoader(new IndexedSyntheticFieldLoader(indexCreatedVersion, fieldType().similarity))
30373033
: null;
30383034
}
@@ -3131,7 +3127,7 @@ public void write(XContentBuilder b) throws IOException {
31313127
*
31323128
* @throws IOException if reading fails
31333129
*/
3134-
public Object copyVectorAsList() throws IOException {
3130+
private Object copyVectorAsList() throws IOException {
31353131
assert hasValue : "vector is null for ord=" + ord;
31363132
if (floatValues != null) {
31373133
float[] raw = floatValues.vectorValue(ord);
@@ -3235,8 +3231,7 @@ public SourceLoader.SyntheticVectorsLoader.Leaf leaf(LeafReaderContext context)
32353231
if (dvLoader == null) {
32363232
return;
32373233
}
3238-
dvLoader.advanceToDoc(doc);
3239-
if (syntheticFieldLoader.hasValue()) {
3234+
if (dvLoader.advanceToDoc(doc) && syntheticFieldLoader.hasValue()) {
32403235
// add vectors as list since that's how they're parsed from xcontent.
32413236
acc.add(
32423237
new SourceLoader.LeafSyntheticVectorPath(syntheticFieldLoader.fieldName(), syntheticFieldLoader.copyVectorAsList())

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 12 additions & 258 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,21 @@
1111

1212
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
1313

14-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1514
import org.apache.lucene.codecs.Codec;
1615
import org.apache.lucene.codecs.KnnVectorsFormat;
1716
import org.apache.lucene.document.BinaryDocValuesField;
1817
import org.apache.lucene.document.KnnByteVectorField;
1918
import org.apache.lucene.document.KnnFloatVectorField;
20-
import org.apache.lucene.index.DirectoryReader;
21-
import org.apache.lucene.index.IndexWriterConfig;
2219
import org.apache.lucene.index.IndexableField;
2320
import org.apache.lucene.index.VectorEncoding;
2421
import org.apache.lucene.index.VectorSimilarityFunction;
2522
import org.apache.lucene.search.FieldExistsQuery;
2623
import org.apache.lucene.search.Query;
27-
import org.apache.lucene.tests.index.RandomIndexWriter;
2824
import org.apache.lucene.util.BytesRef;
2925
import org.apache.lucene.util.VectorUtil;
30-
import org.elasticsearch.common.Strings;
3126
import org.elasticsearch.common.bytes.BytesReference;
32-
import org.elasticsearch.common.settings.Settings;
3327
import org.elasticsearch.common.util.BigArrays;
3428
import org.elasticsearch.common.xcontent.XContentHelper;
35-
import org.elasticsearch.index.IndexSettings;
3629
import org.elasticsearch.index.IndexVersion;
3730
import org.elasticsearch.index.IndexVersions;
3831
import org.elasticsearch.index.codec.CodecService;
@@ -46,7 +39,6 @@
4639
import org.elasticsearch.index.mapper.MapperBuilderContext;
4740
import org.elasticsearch.index.mapper.MapperParsingException;
4841
import org.elasticsearch.index.mapper.MapperService;
49-
import org.elasticsearch.index.mapper.MapperTestCase;
5042
import org.elasticsearch.index.mapper.ParsedDocument;
5143
import org.elasticsearch.index.mapper.SourceToParse;
5244
import org.elasticsearch.index.mapper.ValueFetcher;
@@ -61,7 +53,6 @@
6153
import org.elasticsearch.test.ESTestCase;
6254
import org.elasticsearch.test.index.IndexVersionUtils;
6355
import org.elasticsearch.xcontent.XContentBuilder;
64-
import org.elasticsearch.xcontent.XContentType;
6556
import org.junit.AssumptionViolatedException;
6657

6758
import java.io.IOException;
@@ -74,18 +65,16 @@
7465
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
7566
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
7667
import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector;
77-
import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
7868
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE;
7969
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
8070
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT;
81-
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
8271
import static org.hamcrest.Matchers.containsString;
8372
import static org.hamcrest.Matchers.equalTo;
8473
import static org.hamcrest.Matchers.instanceOf;
8574
import static org.mockito.Mockito.mock;
8675
import static org.mockito.Mockito.when;
8776

88-
public class DenseVectorFieldMapperTests extends MapperTestCase {
77+
public class DenseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase {
8978

9079
private static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions.V_8_10_0;
9180
private final ElementType elementType;
@@ -95,7 +84,7 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
9584

9685
public DenseVectorFieldMapperTests() {
9786
this.elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
98-
this.indexed = randomBoolean();
87+
this.indexed = usually();
9988
this.indexOptionsSet = this.indexed && randomBoolean();
10089
int baseDims = ElementType.BIT == elementType ? 4 * Byte.SIZE : 4;
10190
int randomMultiplier = ElementType.FLOAT == elementType ? randomIntBetween(1, 64) : 1;
@@ -160,17 +149,25 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
160149
protected Object getSampleValueForDocument() {
161150
return elementType == ElementType.FLOAT
162151
? convertToList(randomNormalizedVector(this.dims))
163-
: List.of((byte) 1, (byte) 1, (byte) 1, (byte) 1);
152+
: convertToList(randomByteArrayOfLength(elementType == ElementType.BIT ? this.dims / Byte.SIZE : dims));
164153
}
165154

166-
private static List<Float> convertToList(float[] vector) {
155+
public static List<Float> convertToList(float[] vector) {
167156
List<Float> list = new ArrayList<>(vector.length);
168157
for (float v : vector) {
169158
list.add(v);
170159
}
171160
return list;
172161
}
173162

163+
public static List<Byte> convertToList(byte[] vector) {
164+
List<Byte> list = new ArrayList<>(vector.length);
165+
for (byte v : vector) {
166+
list.add(v);
167+
}
168+
return list;
169+
}
170+
174171
@Override
175172
protected void registerParameters(ParameterChecker checker) throws IOException {
176173
checker.registerConflictCheck(
@@ -2920,249 +2917,6 @@ public void testInvalidVectorDimensions() {
29202917
}
29212918
}
29222919

2923-
public void testSyntheticVectorsMinimalValidDocument() throws IOException {
2924-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2925-
for (XContentType type : XContentType.values()) {
2926-
BytesReference source = generateRandomDoc(type, true, true, false, false, false);
2927-
assertSyntheticVectors(buildVectorMapping(), source, type);
2928-
}
2929-
}
2930-
2931-
public void testSyntheticVectorsFullDocument() throws IOException {
2932-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2933-
for (XContentType type : XContentType.values()) {
2934-
BytesReference source = generateRandomDoc(type, true, true, true, true, false);
2935-
assertSyntheticVectors(buildVectorMapping(), source, type);
2936-
}
2937-
}
2938-
2939-
public void testSyntheticVectorsWithUnmappedFields() throws IOException {
2940-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2941-
for (XContentType type : XContentType.values()) {
2942-
BytesReference source = generateRandomDoc(type, true, true, true, true, true);
2943-
assertSyntheticVectors(buildVectorMapping(), source, type);
2944-
}
2945-
}
2946-
2947-
public void testSyntheticVectorsMissingRootFields() throws IOException {
2948-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2949-
for (XContentType type : XContentType.values()) {
2950-
BytesReference source = generateRandomDoc(type, false, false, false, false, false);
2951-
assertSyntheticVectors(buildVectorMapping(), source, type);
2952-
}
2953-
}
2954-
2955-
public void testSyntheticVectorsPartialNestedContent() throws IOException {
2956-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2957-
for (XContentType type : XContentType.values()) {
2958-
BytesReference source = generateRandomDoc(type, true, true, true, false, false);
2959-
assertSyntheticVectors(buildVectorMapping(), source, type);
2960-
}
2961-
}
2962-
2963-
public void testFlatPathDocument() throws IOException {
2964-
assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
2965-
for (XContentType type : XContentType.values()) {
2966-
BytesReference source = generateRandomDocWithFlatPath(type);
2967-
assertSyntheticVectors(buildVectorMapping(), source, type);
2968-
}
2969-
}
2970-
2971-
private static String buildVectorMapping() throws IOException {
2972-
try (XContentBuilder builder = XContentBuilder.builder(XContentType.JSON.xContent())) {
2973-
builder.startObject(); // root
2974-
builder.startObject("_doc");
2975-
builder.field("dynamic", "false");
2976-
2977-
builder.startObject("properties");
2978-
2979-
// field
2980-
builder.startObject("field");
2981-
builder.field("type", "keyword");
2982-
builder.endObject();
2983-
2984-
// emb
2985-
builder.startObject("emb");
2986-
builder.field("type", "dense_vector");
2987-
builder.field("dims", 3);
2988-
builder.field("similarity", "cosine");
2989-
builder.endObject();
2990-
2991-
// another_field
2992-
builder.startObject("another_field");
2993-
builder.field("type", "keyword");
2994-
builder.endObject();
2995-
2996-
// obj
2997-
builder.startObject("obj");
2998-
builder.startObject("properties");
2999-
3000-
// nested
3001-
builder.startObject("nested");
3002-
builder.field("type", "nested");
3003-
builder.startObject("properties");
3004-
3005-
// nested.field
3006-
builder.startObject("field");
3007-
builder.field("type", "keyword");
3008-
builder.endObject();
3009-
3010-
// nested.emb
3011-
builder.startObject("emb");
3012-
builder.field("type", "dense_vector");
3013-
builder.field("dims", 3);
3014-
builder.field("similarity", "cosine");
3015-
builder.endObject();
3016-
3017-
// double_nested
3018-
builder.startObject("double_nested");
3019-
builder.field("type", "nested");
3020-
builder.startObject("properties");
3021-
3022-
// double_nested.field
3023-
builder.startObject("field");
3024-
builder.field("type", "keyword");
3025-
builder.endObject();
3026-
3027-
// double_nested.emb
3028-
builder.startObject("emb");
3029-
builder.field("type", "dense_vector");
3030-
builder.field("dims", 3);
3031-
builder.field("similarity", "cosine");
3032-
builder.endObject();
3033-
3034-
builder.endObject(); // double_nested.properties
3035-
builder.endObject(); // double_nested
3036-
3037-
builder.endObject(); // nested.properties
3038-
builder.endObject(); // nested
3039-
3040-
builder.endObject(); // obj.properties
3041-
builder.endObject(); // obj
3042-
3043-
builder.endObject(); // properties
3044-
builder.endObject(); // _doc
3045-
builder.endObject(); // root
3046-
3047-
return Strings.toString(builder);
3048-
}
3049-
}
3050-
3051-
private BytesReference generateRandomDoc(
3052-
XContentType xContentType,
3053-
boolean includeRootField,
3054-
boolean includeVector,
3055-
boolean includeNested,
3056-
boolean includeDoubleNested,
3057-
boolean includeUnmapped
3058-
) throws IOException {
3059-
try (var builder = XContentBuilder.builder(xContentType.xContent())) {
3060-
builder.startObject();
3061-
3062-
if (includeRootField) {
3063-
builder.field("field", randomAlphaOfLengthBetween(1, 2));
3064-
}
3065-
3066-
if (includeVector) {
3067-
builder.array("emb", new float[] { 1, 2, 3 });
3068-
}
3069-
3070-
if (includeUnmapped) {
3071-
builder.field("unmapped_field", "extra");
3072-
}
3073-
3074-
builder.startObject("obj");
3075-
if (includeNested) {
3076-
builder.startArray("nested");
3077-
3078-
// Entry with just a field
3079-
builder.startObject();
3080-
builder.field("field", randomAlphaOfLengthBetween(3, 6));
3081-
builder.endObject();
3082-
3083-
// Empty object
3084-
builder.startObject();
3085-
builder.endObject();
3086-
3087-
// Entry with emb and double_nested
3088-
if (includeDoubleNested) {
3089-
builder.startObject();
3090-
builder.array("emb", new float[] { 1, 2, 3 });
3091-
builder.field("field", "nested_val");
3092-
builder.startArray("double_nested");
3093-
for (int i = 0; i < 2; i++) {
3094-
builder.startObject();
3095-
builder.array("emb", new float[] { 1, 2, 3 });
3096-
builder.field("field", "dn_field");
3097-
builder.endObject();
3098-
}
3099-
builder.endArray();
3100-
builder.endObject();
3101-
}
3102-
3103-
builder.endArray();
3104-
}
3105-
builder.endObject();
3106-
3107-
builder.endObject();
3108-
return BytesReference.bytes(builder);
3109-
}
3110-
}
3111-
3112-
private BytesReference generateRandomDocWithFlatPath(XContentType xContentType) throws IOException {
3113-
try (var builder = XContentBuilder.builder(xContentType.xContent())) {
3114-
builder.startObject();
3115-
3116-
// Root-level fields
3117-
builder.field("field", randomAlphaOfLengthBetween(1, 2));
3118-
builder.array("emb", new float[] { 1, 2, 3 });
3119-
builder.field("another_field", randomAlphaOfLengthBetween(3, 5));
3120-
3121-
// Simulated flattened "obj.nested"
3122-
builder.startObject("obj.nested");
3123-
3124-
builder.field("field", randomAlphaOfLengthBetween(4, 8));
3125-
builder.array("emb", new float[] { 1, 2, 3 });
3126-
3127-
builder.startArray("double_nested");
3128-
for (int i = 0; i < randomIntBetween(1, 2); i++) {
3129-
builder.startObject();
3130-
builder.field("field", randomAlphaOfLengthBetween(4, 8));
3131-
builder.array("emb", new float[] { 1, 2, 3 });
3132-
builder.endObject();
3133-
}
3134-
builder.endArray();
3135-
3136-
builder.endObject(); // end obj.nested
3137-
3138-
builder.endObject();
3139-
return BytesReference.bytes(builder);
3140-
}
3141-
}
3142-
3143-
private void assertSyntheticVectors(String mapping, BytesReference source, XContentType xContentType) throws IOException {
3144-
var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build();
3145-
MapperService mapperService = createMapperService(settings, mapping);
3146-
var parsedDoc = mapperService.documentMapper().parse(new SourceToParse("0", source, xContentType));
3147-
try (var directory = newDirectory()) {
3148-
IndexWriterConfig config = newIndexWriterConfig(random(), new StandardAnalyzer());
3149-
try (var iw = new RandomIndexWriter(random(), directory, config)) {
3150-
parsedDoc.updateSeqID(0, 1);
3151-
parsedDoc.version().setLongValue(0);
3152-
iw.addDocuments(parsedDoc.docs());
3153-
}
3154-
try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
3155-
var provider = SourceProvider.fromLookup(
3156-
mapperService.mappingLookup(),
3157-
null,
3158-
mapperService.getMapperMetrics().sourceFieldMetrics()
3159-
);
3160-
var searchSource = provider.getSource(indexReader.leaves().get(0), parsedDoc.docs().size() - 1);
3161-
assertToXContentEquivalent(source, searchSource.internalSourceRef(), xContentType);
3162-
}
3163-
}
3164-
}
3165-
31662920
@Override
31672921
protected IngestScriptSupport ingestScriptSupport() {
31682922
throw new AssumptionViolatedException("not supported");

0 commit comments

Comments
 (0)