Skip to content

Commit cdfd3dd

Browse files
authored
Remove most usages of XFeatureField as Lucene upgrade provides needed APIs (#130572)
XFeatureField was added in conjunction with a required update for SparseVectorFields. However, with the upgrade to Lucene 10.1+, it has outstayed its usefulness.
1 parent 77c0e76 commit cdfd3dd

File tree

4 files changed

+21
-156
lines changed

4 files changed

+21
-156
lines changed

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,9 @@ public void parse(DocumentParserContext context) throws IOException {
369369
// based on recommendations from this paper: https://arxiv.org/pdf/2305.18494.pdf
370370
IndexableField currentField = context.doc().getByKey(key);
371371
if (currentField == null) {
372-
context.doc().addWithKey(key, new XFeatureField(fullPath(), feature, value, fieldType().isStored()));
373-
} else if (currentField instanceof XFeatureField && ((XFeatureField) currentField).getFeatureValue() < value) {
374-
((XFeatureField) currentField).setFeatureValue(value);
372+
context.doc().addWithKey(key, new FeatureField(fullPath(), feature, value, fieldType().isStored()));
373+
} else if (currentField instanceof FeatureField ff && ff.getFeatureValue() < value) {
374+
ff.setFeatureValue(value);
375375
}
376376
} else {
377377
throw new IllegalArgumentException(

server/src/main/java/org/elasticsearch/index/mapper/vectors/XFeatureField.java

Lines changed: 2 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -18,149 +18,13 @@
1818

1919
package org.elasticsearch.index.mapper.vectors;
2020

21-
import org.apache.lucene.analysis.Analyzer;
22-
import org.apache.lucene.analysis.TokenStream;
23-
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
24-
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
2521
import org.apache.lucene.document.FeatureField;
26-
import org.apache.lucene.document.Field;
27-
import org.apache.lucene.document.FieldType;
28-
import org.apache.lucene.index.IndexOptions;
2922

3023
/**
3124
* This class is forked from the Lucene {@link FeatureField} implementation to enable support for storing term vectors.
32-
* It should be removed once apache/lucene#14034 becomes available.
25+
* Its purpose is to allow decoding the feature value from the term frequency
3326
*/
34-
public final class XFeatureField extends Field {
35-
private static final FieldType FIELD_TYPE = new FieldType();
36-
private static final FieldType FIELD_TYPE_STORE_TERM_VECTORS = new FieldType();
37-
38-
static {
39-
FIELD_TYPE.setTokenized(false);
40-
FIELD_TYPE.setOmitNorms(true);
41-
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
42-
43-
FIELD_TYPE_STORE_TERM_VECTORS.setTokenized(false);
44-
FIELD_TYPE_STORE_TERM_VECTORS.setOmitNorms(true);
45-
FIELD_TYPE_STORE_TERM_VECTORS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
46-
FIELD_TYPE_STORE_TERM_VECTORS.setStoreTermVectors(true);
47-
}
48-
49-
private float featureValue;
50-
51-
/**
52-
* Create a feature.
53-
*
54-
* @param fieldName The name of the field to store the information into. All features may be
55-
* stored in the same field.
56-
* @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term.
57-
* @param featureValue The value of the feature, must be a positive, finite, normal float.
58-
*/
59-
public XFeatureField(String fieldName, String featureName, float featureValue) {
60-
this(fieldName, featureName, featureValue, false);
61-
}
62-
63-
/**
64-
* Create a feature.
65-
*
66-
* @param fieldName The name of the field to store the information into. All features may be
67-
* stored in the same field.
68-
* @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term.
69-
* @param featureValue The value of the feature, must be a positive, finite, normal float.
70-
*/
71-
public XFeatureField(String fieldName, String featureName, float featureValue, boolean storeTermVectors) {
72-
super(fieldName, featureName, storeTermVectors ? FIELD_TYPE_STORE_TERM_VECTORS : FIELD_TYPE);
73-
setFeatureValue(featureValue);
74-
}
75-
76-
/**
77-
* Update the feature value of this field.
78-
*/
79-
public void setFeatureValue(float featureValue) {
80-
if (Float.isFinite(featureValue) == false) {
81-
throw new IllegalArgumentException(
82-
"featureValue must be finite, got: " + featureValue + " for feature " + fieldsData + " on field " + name
83-
);
84-
}
85-
if (featureValue < Float.MIN_NORMAL) {
86-
throw new IllegalArgumentException(
87-
"featureValue must be a positive normal float, got: "
88-
+ featureValue
89-
+ " for feature "
90-
+ fieldsData
91-
+ " on field "
92-
+ name
93-
+ " which is less than the minimum positive normal float: "
94-
+ Float.MIN_NORMAL
95-
);
96-
}
97-
this.featureValue = featureValue;
98-
}
99-
100-
@Override
101-
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
102-
FeatureTokenStream stream;
103-
if (reuse instanceof FeatureTokenStream) {
104-
stream = (FeatureTokenStream) reuse;
105-
} else {
106-
stream = new FeatureTokenStream();
107-
}
108-
109-
int freqBits = Float.floatToIntBits(featureValue);
110-
stream.setValues((String) fieldsData, freqBits >>> 15);
111-
return stream;
112-
}
113-
114-
/**
115-
* This is useful if you have multiple features sharing a name and you want to take action to
116-
* deduplicate them.
117-
*
118-
* @return the feature value of this field.
119-
*/
120-
public float getFeatureValue() {
121-
return featureValue;
122-
}
123-
124-
private static final class FeatureTokenStream extends TokenStream {
125-
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
126-
private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
127-
private boolean used = true;
128-
private String value = null;
129-
private int freq = 0;
130-
131-
private FeatureTokenStream() {}
132-
133-
/**
134-
* Sets the values
135-
*/
136-
void setValues(String value, int freq) {
137-
this.value = value;
138-
this.freq = freq;
139-
}
140-
141-
@Override
142-
public boolean incrementToken() {
143-
if (used) {
144-
return false;
145-
}
146-
clearAttributes();
147-
termAttribute.append(value);
148-
freqAttribute.setTermFrequency(freq);
149-
used = true;
150-
return true;
151-
}
152-
153-
@Override
154-
public void reset() {
155-
used = false;
156-
}
157-
158-
@Override
159-
public void close() {
160-
value = null;
161-
}
162-
}
163-
27+
public final class XFeatureField {
16428
static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15;
16529

16630
static float decodeFeatureValue(float freq) {

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.apache.lucene.analysis.TokenStream;
1313
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
14+
import org.apache.lucene.document.FeatureField;
1415
import org.apache.lucene.index.DirectoryReader;
1516
import org.apache.lucene.index.IndexableField;
1617
import org.apache.lucene.index.LeafReader;
@@ -212,14 +213,14 @@ public void testDefaults() throws Exception {
212213

213214
List<IndexableField> fields = doc1.rootDoc().getFields("field");
214215
assertEquals(2, fields.size());
215-
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
216-
XFeatureField featureField1 = null;
217-
XFeatureField featureField2 = null;
216+
assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
217+
FeatureField featureField1 = null;
218+
FeatureField featureField2 = null;
218219
for (IndexableField field : fields) {
219220
if (field.stringValue().equals("ten")) {
220-
featureField1 = (XFeatureField) field;
221+
featureField1 = (FeatureField) field;
221222
} else if (field.stringValue().equals("twenty")) {
222-
featureField2 = (XFeatureField) field;
223+
featureField2 = (FeatureField) field;
223224
} else {
224225
throw new UnsupportedOperationException();
225226
}
@@ -314,14 +315,14 @@ public void testDotInFieldName() throws Exception {
314315

315316
List<IndexableField> fields = parsedDocument.rootDoc().getFields("field");
316317
assertEquals(2, fields.size());
317-
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
318-
XFeatureField featureField1 = null;
319-
XFeatureField featureField2 = null;
318+
assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
319+
FeatureField featureField1 = null;
320+
FeatureField featureField2 = null;
320321
for (IndexableField field : fields) {
321322
if (field.stringValue().equals("foo.bar")) {
322-
featureField1 = (XFeatureField) field;
323+
featureField1 = (FeatureField) field;
323324
} else if (field.stringValue().equals("foobar")) {
324-
featureField2 = (XFeatureField) field;
325+
featureField2 = (FeatureField) field;
325326
} else {
326327
throw new UnsupportedOperationException();
327328
}
@@ -369,13 +370,13 @@ public void testHandlesMultiValuedFields() throws MapperParsingException, IOExce
369370
}));
370371

371372
// then validate that the generate document stored both values appropriately and we have only the max value stored
372-
XFeatureField barField = ((XFeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
373+
FeatureField barField = ((FeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
373374
assertEquals(20, barField.getFeatureValue(), 1);
374375

375-
XFeatureField storedBarField = ((XFeatureField) doc1.rootDoc().getFields("foo.field").get(1));
376+
FeatureField storedBarField = ((FeatureField) doc1.rootDoc().getFields("foo.field").get(1));
376377
assertEquals(20, storedBarField.getFeatureValue(), 1);
377378

378-
assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof XFeatureField).count());
379+
assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof FeatureField).count());
379380
}
380381

381382
public void testCannotBeUsedInMultiFields() {

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
1111

1212
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
13+
import org.apache.lucene.document.FeatureField;
1314
import org.apache.lucene.index.FieldInfo;
1415
import org.apache.lucene.index.FieldInfos;
1516
import org.apache.lucene.index.IndexableField;
@@ -55,7 +56,6 @@
5556
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
5657
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests;
5758
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
58-
import org.elasticsearch.index.mapper.vectors.XFeatureField;
5959
import org.elasticsearch.index.query.SearchExecutionContext;
6060
import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
6161
import org.elasticsearch.inference.ChunkingSettings;
@@ -1561,7 +1561,7 @@ private static void assertChildLeafNestedDocument(
15611561
private static void assertSparseFeatures(LuceneDocument doc, String fieldName, int expectedCount) {
15621562
int count = 0;
15631563
for (IndexableField field : doc.getFields()) {
1564-
if (field instanceof XFeatureField featureField) {
1564+
if (field instanceof FeatureField featureField) {
15651565
assertThat(featureField.name(), equalTo(fieldName));
15661566
++count;
15671567
}

0 commit comments

Comments
 (0)