11
11
12
12
import com .carrotsearch .randomizedtesting .generators .RandomPicks ;
13
13
14
- import org .apache .lucene .analysis .standard .StandardAnalyzer ;
15
14
import org .apache .lucene .codecs .Codec ;
16
15
import org .apache .lucene .codecs .KnnVectorsFormat ;
17
16
import org .apache .lucene .document .BinaryDocValuesField ;
18
17
import org .apache .lucene .document .KnnByteVectorField ;
19
18
import org .apache .lucene .document .KnnFloatVectorField ;
20
- import org .apache .lucene .index .DirectoryReader ;
21
- import org .apache .lucene .index .IndexWriterConfig ;
22
19
import org .apache .lucene .index .IndexableField ;
23
20
import org .apache .lucene .index .VectorEncoding ;
24
21
import org .apache .lucene .index .VectorSimilarityFunction ;
25
22
import org .apache .lucene .search .FieldExistsQuery ;
26
23
import org .apache .lucene .search .Query ;
27
- import org .apache .lucene .tests .index .RandomIndexWriter ;
28
24
import org .apache .lucene .util .BytesRef ;
29
25
import org .apache .lucene .util .VectorUtil ;
30
- import org .elasticsearch .common .Strings ;
31
26
import org .elasticsearch .common .bytes .BytesReference ;
32
- import org .elasticsearch .common .settings .Settings ;
33
27
import org .elasticsearch .common .util .BigArrays ;
34
28
import org .elasticsearch .common .xcontent .XContentHelper ;
35
- import org .elasticsearch .index .IndexSettings ;
36
29
import org .elasticsearch .index .IndexVersion ;
37
30
import org .elasticsearch .index .IndexVersions ;
38
31
import org .elasticsearch .index .codec .CodecService ;
46
39
import org .elasticsearch .index .mapper .MapperBuilderContext ;
47
40
import org .elasticsearch .index .mapper .MapperParsingException ;
48
41
import org .elasticsearch .index .mapper .MapperService ;
49
- import org .elasticsearch .index .mapper .MapperTestCase ;
50
42
import org .elasticsearch .index .mapper .ParsedDocument ;
51
43
import org .elasticsearch .index .mapper .SourceToParse ;
52
44
import org .elasticsearch .index .mapper .ValueFetcher ;
61
53
import org .elasticsearch .test .ESTestCase ;
62
54
import org .elasticsearch .test .index .IndexVersionUtils ;
63
55
import org .elasticsearch .xcontent .XContentBuilder ;
64
- import org .elasticsearch .xcontent .XContentType ;
65
56
import org .junit .AssumptionViolatedException ;
66
57
67
58
import java .io .IOException ;
74
65
import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsFormat .DEFAULT_BEAM_WIDTH ;
75
66
import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsFormat .DEFAULT_MAX_CONN ;
76
67
import static org .apache .lucene .tests .index .BaseKnnVectorsFormatTestCase .randomNormalizedVector ;
77
- import static org .elasticsearch .index .IndexSettings .SYNTHETIC_VECTORS ;
78
68
import static org .elasticsearch .index .codec .vectors .IVFVectorsFormat .DYNAMIC_NPROBE ;
79
69
import static org .elasticsearch .index .mapper .vectors .DenseVectorFieldMapper .DEFAULT_OVERSAMPLE ;
80
70
import static org .elasticsearch .index .mapper .vectors .DenseVectorFieldMapper .IVF_FORMAT ;
81
- import static org .elasticsearch .test .hamcrest .ElasticsearchAssertions .assertToXContentEquivalent ;
82
71
import static org .hamcrest .Matchers .containsString ;
83
72
import static org .hamcrest .Matchers .equalTo ;
84
73
import static org .hamcrest .Matchers .instanceOf ;
85
74
import static org .mockito .Mockito .mock ;
86
75
import static org .mockito .Mockito .when ;
87
76
88
- public class DenseVectorFieldMapperTests extends MapperTestCase {
77
+ public class DenseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase {
89
78
90
79
private static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions .V_8_10_0 ;
91
80
private final ElementType elementType ;
@@ -95,7 +84,7 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
95
84
96
85
public DenseVectorFieldMapperTests () {
97
86
this .elementType = randomFrom (ElementType .BYTE , ElementType .FLOAT , ElementType .BIT );
98
- this .indexed = randomBoolean ();
87
+ this .indexed = usually ();
99
88
this .indexOptionsSet = this .indexed && randomBoolean ();
100
89
int baseDims = ElementType .BIT == elementType ? 4 * Byte .SIZE : 4 ;
101
90
int randomMultiplier = ElementType .FLOAT == elementType ? randomIntBetween (1 , 64 ) : 1 ;
@@ -160,17 +149,25 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
160
149
protected Object getSampleValueForDocument () {
161
150
return elementType == ElementType .FLOAT
162
151
? convertToList (randomNormalizedVector (this .dims ))
163
- : List . of (( byte ) 1 , ( byte ) 1 , ( byte ) 1 , ( byte ) 1 );
152
+ : convertToList ( randomByteArrayOfLength ( elementType == ElementType . BIT ? this . dims / Byte . SIZE : dims ) );
164
153
}
165
154
166
- private static List <Float > convertToList (float [] vector ) {
155
+ public static List <Float > convertToList (float [] vector ) {
167
156
List <Float > list = new ArrayList <>(vector .length );
168
157
for (float v : vector ) {
169
158
list .add (v );
170
159
}
171
160
return list ;
172
161
}
173
162
163
+ public static List <Byte > convertToList (byte [] vector ) {
164
+ List <Byte > list = new ArrayList <>(vector .length );
165
+ for (byte v : vector ) {
166
+ list .add (v );
167
+ }
168
+ return list ;
169
+ }
170
+
174
171
@ Override
175
172
protected void registerParameters (ParameterChecker checker ) throws IOException {
176
173
checker .registerConflictCheck (
@@ -2920,249 +2917,6 @@ public void testInvalidVectorDimensions() {
2920
2917
}
2921
2918
}
2922
2919
2923
- public void testSyntheticVectorsMinimalValidDocument () throws IOException {
2924
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2925
- for (XContentType type : XContentType .values ()) {
2926
- BytesReference source = generateRandomDoc (type , true , true , false , false , false );
2927
- assertSyntheticVectors (buildVectorMapping (), source , type );
2928
- }
2929
- }
2930
-
2931
- public void testSyntheticVectorsFullDocument () throws IOException {
2932
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2933
- for (XContentType type : XContentType .values ()) {
2934
- BytesReference source = generateRandomDoc (type , true , true , true , true , false );
2935
- assertSyntheticVectors (buildVectorMapping (), source , type );
2936
- }
2937
- }
2938
-
2939
- public void testSyntheticVectorsWithUnmappedFields () throws IOException {
2940
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2941
- for (XContentType type : XContentType .values ()) {
2942
- BytesReference source = generateRandomDoc (type , true , true , true , true , true );
2943
- assertSyntheticVectors (buildVectorMapping (), source , type );
2944
- }
2945
- }
2946
-
2947
- public void testSyntheticVectorsMissingRootFields () throws IOException {
2948
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2949
- for (XContentType type : XContentType .values ()) {
2950
- BytesReference source = generateRandomDoc (type , false , false , false , false , false );
2951
- assertSyntheticVectors (buildVectorMapping (), source , type );
2952
- }
2953
- }
2954
-
2955
- public void testSyntheticVectorsPartialNestedContent () throws IOException {
2956
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2957
- for (XContentType type : XContentType .values ()) {
2958
- BytesReference source = generateRandomDoc (type , true , true , true , false , false );
2959
- assertSyntheticVectors (buildVectorMapping (), source , type );
2960
- }
2961
- }
2962
-
2963
- public void testFlatPathDocument () throws IOException {
2964
- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2965
- for (XContentType type : XContentType .values ()) {
2966
- BytesReference source = generateRandomDocWithFlatPath (type );
2967
- assertSyntheticVectors (buildVectorMapping (), source , type );
2968
- }
2969
- }
2970
-
2971
- private static String buildVectorMapping () throws IOException {
2972
- try (XContentBuilder builder = XContentBuilder .builder (XContentType .JSON .xContent ())) {
2973
- builder .startObject (); // root
2974
- builder .startObject ("_doc" );
2975
- builder .field ("dynamic" , "false" );
2976
-
2977
- builder .startObject ("properties" );
2978
-
2979
- // field
2980
- builder .startObject ("field" );
2981
- builder .field ("type" , "keyword" );
2982
- builder .endObject ();
2983
-
2984
- // emb
2985
- builder .startObject ("emb" );
2986
- builder .field ("type" , "dense_vector" );
2987
- builder .field ("dims" , 3 );
2988
- builder .field ("similarity" , "cosine" );
2989
- builder .endObject ();
2990
-
2991
- // another_field
2992
- builder .startObject ("another_field" );
2993
- builder .field ("type" , "keyword" );
2994
- builder .endObject ();
2995
-
2996
- // obj
2997
- builder .startObject ("obj" );
2998
- builder .startObject ("properties" );
2999
-
3000
- // nested
3001
- builder .startObject ("nested" );
3002
- builder .field ("type" , "nested" );
3003
- builder .startObject ("properties" );
3004
-
3005
- // nested.field
3006
- builder .startObject ("field" );
3007
- builder .field ("type" , "keyword" );
3008
- builder .endObject ();
3009
-
3010
- // nested.emb
3011
- builder .startObject ("emb" );
3012
- builder .field ("type" , "dense_vector" );
3013
- builder .field ("dims" , 3 );
3014
- builder .field ("similarity" , "cosine" );
3015
- builder .endObject ();
3016
-
3017
- // double_nested
3018
- builder .startObject ("double_nested" );
3019
- builder .field ("type" , "nested" );
3020
- builder .startObject ("properties" );
3021
-
3022
- // double_nested.field
3023
- builder .startObject ("field" );
3024
- builder .field ("type" , "keyword" );
3025
- builder .endObject ();
3026
-
3027
- // double_nested.emb
3028
- builder .startObject ("emb" );
3029
- builder .field ("type" , "dense_vector" );
3030
- builder .field ("dims" , 3 );
3031
- builder .field ("similarity" , "cosine" );
3032
- builder .endObject ();
3033
-
3034
- builder .endObject (); // double_nested.properties
3035
- builder .endObject (); // double_nested
3036
-
3037
- builder .endObject (); // nested.properties
3038
- builder .endObject (); // nested
3039
-
3040
- builder .endObject (); // obj.properties
3041
- builder .endObject (); // obj
3042
-
3043
- builder .endObject (); // properties
3044
- builder .endObject (); // _doc
3045
- builder .endObject (); // root
3046
-
3047
- return Strings .toString (builder );
3048
- }
3049
- }
3050
-
3051
- private BytesReference generateRandomDoc (
3052
- XContentType xContentType ,
3053
- boolean includeRootField ,
3054
- boolean includeVector ,
3055
- boolean includeNested ,
3056
- boolean includeDoubleNested ,
3057
- boolean includeUnmapped
3058
- ) throws IOException {
3059
- try (var builder = XContentBuilder .builder (xContentType .xContent ())) {
3060
- builder .startObject ();
3061
-
3062
- if (includeRootField ) {
3063
- builder .field ("field" , randomAlphaOfLengthBetween (1 , 2 ));
3064
- }
3065
-
3066
- if (includeVector ) {
3067
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3068
- }
3069
-
3070
- if (includeUnmapped ) {
3071
- builder .field ("unmapped_field" , "extra" );
3072
- }
3073
-
3074
- builder .startObject ("obj" );
3075
- if (includeNested ) {
3076
- builder .startArray ("nested" );
3077
-
3078
- // Entry with just a field
3079
- builder .startObject ();
3080
- builder .field ("field" , randomAlphaOfLengthBetween (3 , 6 ));
3081
- builder .endObject ();
3082
-
3083
- // Empty object
3084
- builder .startObject ();
3085
- builder .endObject ();
3086
-
3087
- // Entry with emb and double_nested
3088
- if (includeDoubleNested ) {
3089
- builder .startObject ();
3090
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3091
- builder .field ("field" , "nested_val" );
3092
- builder .startArray ("double_nested" );
3093
- for (int i = 0 ; i < 2 ; i ++) {
3094
- builder .startObject ();
3095
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3096
- builder .field ("field" , "dn_field" );
3097
- builder .endObject ();
3098
- }
3099
- builder .endArray ();
3100
- builder .endObject ();
3101
- }
3102
-
3103
- builder .endArray ();
3104
- }
3105
- builder .endObject ();
3106
-
3107
- builder .endObject ();
3108
- return BytesReference .bytes (builder );
3109
- }
3110
- }
3111
-
3112
- private BytesReference generateRandomDocWithFlatPath (XContentType xContentType ) throws IOException {
3113
- try (var builder = XContentBuilder .builder (xContentType .xContent ())) {
3114
- builder .startObject ();
3115
-
3116
- // Root-level fields
3117
- builder .field ("field" , randomAlphaOfLengthBetween (1 , 2 ));
3118
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3119
- builder .field ("another_field" , randomAlphaOfLengthBetween (3 , 5 ));
3120
-
3121
- // Simulated flattened "obj.nested"
3122
- builder .startObject ("obj.nested" );
3123
-
3124
- builder .field ("field" , randomAlphaOfLengthBetween (4 , 8 ));
3125
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3126
-
3127
- builder .startArray ("double_nested" );
3128
- for (int i = 0 ; i < randomIntBetween (1 , 2 ); i ++) {
3129
- builder .startObject ();
3130
- builder .field ("field" , randomAlphaOfLengthBetween (4 , 8 ));
3131
- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3132
- builder .endObject ();
3133
- }
3134
- builder .endArray ();
3135
-
3136
- builder .endObject (); // end obj.nested
3137
-
3138
- builder .endObject ();
3139
- return BytesReference .bytes (builder );
3140
- }
3141
- }
3142
-
3143
- private void assertSyntheticVectors (String mapping , BytesReference source , XContentType xContentType ) throws IOException {
3144
- var settings = Settings .builder ().put (IndexSettings .INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING .getKey (), true ).build ();
3145
- MapperService mapperService = createMapperService (settings , mapping );
3146
- var parsedDoc = mapperService .documentMapper ().parse (new SourceToParse ("0" , source , xContentType ));
3147
- try (var directory = newDirectory ()) {
3148
- IndexWriterConfig config = newIndexWriterConfig (random (), new StandardAnalyzer ());
3149
- try (var iw = new RandomIndexWriter (random (), directory , config )) {
3150
- parsedDoc .updateSeqID (0 , 1 );
3151
- parsedDoc .version ().setLongValue (0 );
3152
- iw .addDocuments (parsedDoc .docs ());
3153
- }
3154
- try (var indexReader = wrapInMockESDirectoryReader (DirectoryReader .open (directory ))) {
3155
- var provider = SourceProvider .fromLookup (
3156
- mapperService .mappingLookup (),
3157
- null ,
3158
- mapperService .getMapperMetrics ().sourceFieldMetrics ()
3159
- );
3160
- var searchSource = provider .getSource (indexReader .leaves ().get (0 ), parsedDoc .docs ().size () - 1 );
3161
- assertToXContentEquivalent (source , searchSource .internalSourceRef (), xContentType );
3162
- }
3163
- }
3164
- }
3165
-
3166
2920
@ Override
3167
2921
protected IngestScriptSupport ingestScriptSupport () {
3168
2922
throw new AssumptionViolatedException ("not supported" );
0 commit comments