|
18 | 18 |
|
19 | 19 | package org.elasticsearch.index.mapper.vectors;
|
20 | 20 |
|
21 |
| -import org.apache.lucene.analysis.Analyzer; |
22 |
| -import org.apache.lucene.analysis.TokenStream; |
23 |
| -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
24 |
| -import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; |
25 | 21 | import org.apache.lucene.document.FeatureField;
|
26 |
| -import org.apache.lucene.document.Field; |
27 |
| -import org.apache.lucene.document.FieldType; |
28 |
| -import org.apache.lucene.index.IndexOptions; |
29 | 22 |
|
30 | 23 | /**
|
31 | 24 | * This class is forked from the Lucene {@link FeatureField} implementation to enable support for storing term vectors.
|
32 |
| - * It should be removed once apache/lucene#14034 becomes available. |
| 25 | + * Its purpose is to allow decoding the feature value from the term frequency |
33 | 26 | */
|
34 |
| -public final class XFeatureField extends Field { |
35 |
| - private static final FieldType FIELD_TYPE = new FieldType(); |
36 |
| - private static final FieldType FIELD_TYPE_STORE_TERM_VECTORS = new FieldType(); |
37 |
| - |
38 |
| - static { |
39 |
| - FIELD_TYPE.setTokenized(false); |
40 |
| - FIELD_TYPE.setOmitNorms(true); |
41 |
| - FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS); |
42 |
| - |
43 |
| - FIELD_TYPE_STORE_TERM_VECTORS.setTokenized(false); |
44 |
| - FIELD_TYPE_STORE_TERM_VECTORS.setOmitNorms(true); |
45 |
| - FIELD_TYPE_STORE_TERM_VECTORS.setIndexOptions(IndexOptions.DOCS_AND_FREQS); |
46 |
| - FIELD_TYPE_STORE_TERM_VECTORS.setStoreTermVectors(true); |
47 |
| - } |
48 |
| - |
49 |
| - private float featureValue; |
50 |
| - |
51 |
| - /** |
52 |
| - * Create a feature. |
53 |
| - * |
54 |
| - * @param fieldName The name of the field to store the information into. All features may be |
55 |
| - * stored in the same field. |
56 |
| - * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term. |
57 |
| - * @param featureValue The value of the feature, must be a positive, finite, normal float. |
58 |
| - */ |
59 |
| - public XFeatureField(String fieldName, String featureName, float featureValue) { |
60 |
| - this(fieldName, featureName, featureValue, false); |
61 |
| - } |
62 |
| - |
63 |
| - /** |
64 |
| - * Create a feature. |
65 |
| - * |
66 |
| - * @param fieldName The name of the field to store the information into. All features may be |
67 |
| - * stored in the same field. |
68 |
| - * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term. |
69 |
| - * @param featureValue The value of the feature, must be a positive, finite, normal float. |
70 |
| - */ |
71 |
| - public XFeatureField(String fieldName, String featureName, float featureValue, boolean storeTermVectors) { |
72 |
| - super(fieldName, featureName, storeTermVectors ? FIELD_TYPE_STORE_TERM_VECTORS : FIELD_TYPE); |
73 |
| - setFeatureValue(featureValue); |
74 |
| - } |
75 |
| - |
76 |
| - /** |
77 |
| - * Update the feature value of this field. |
78 |
| - */ |
79 |
| - public void setFeatureValue(float featureValue) { |
80 |
| - if (Float.isFinite(featureValue) == false) { |
81 |
| - throw new IllegalArgumentException( |
82 |
| - "featureValue must be finite, got: " + featureValue + " for feature " + fieldsData + " on field " + name |
83 |
| - ); |
84 |
| - } |
85 |
| - if (featureValue < Float.MIN_NORMAL) { |
86 |
| - throw new IllegalArgumentException( |
87 |
| - "featureValue must be a positive normal float, got: " |
88 |
| - + featureValue |
89 |
| - + " for feature " |
90 |
| - + fieldsData |
91 |
| - + " on field " |
92 |
| - + name |
93 |
| - + " which is less than the minimum positive normal float: " |
94 |
| - + Float.MIN_NORMAL |
95 |
| - ); |
96 |
| - } |
97 |
| - this.featureValue = featureValue; |
98 |
| - } |
99 |
| - |
100 |
| - @Override |
101 |
| - public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { |
102 |
| - FeatureTokenStream stream; |
103 |
| - if (reuse instanceof FeatureTokenStream) { |
104 |
| - stream = (FeatureTokenStream) reuse; |
105 |
| - } else { |
106 |
| - stream = new FeatureTokenStream(); |
107 |
| - } |
108 |
| - |
109 |
| - int freqBits = Float.floatToIntBits(featureValue); |
110 |
| - stream.setValues((String) fieldsData, freqBits >>> 15); |
111 |
| - return stream; |
112 |
| - } |
113 |
| - |
114 |
| - /** |
115 |
| - * This is useful if you have multiple features sharing a name and you want to take action to |
116 |
| - * deduplicate them. |
117 |
| - * |
118 |
| - * @return the feature value of this field. |
119 |
| - */ |
120 |
| - public float getFeatureValue() { |
121 |
| - return featureValue; |
122 |
| - } |
123 |
| - |
124 |
| - private static final class FeatureTokenStream extends TokenStream { |
125 |
| - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); |
126 |
| - private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class); |
127 |
| - private boolean used = true; |
128 |
| - private String value = null; |
129 |
| - private int freq = 0; |
130 |
| - |
131 |
| - private FeatureTokenStream() {} |
132 |
| - |
133 |
| - /** |
134 |
| - * Sets the values |
135 |
| - */ |
136 |
| - void setValues(String value, int freq) { |
137 |
| - this.value = value; |
138 |
| - this.freq = freq; |
139 |
| - } |
140 |
| - |
141 |
| - @Override |
142 |
| - public boolean incrementToken() { |
143 |
| - if (used) { |
144 |
| - return false; |
145 |
| - } |
146 |
| - clearAttributes(); |
147 |
| - termAttribute.append(value); |
148 |
| - freqAttribute.setTermFrequency(freq); |
149 |
| - used = true; |
150 |
| - return true; |
151 |
| - } |
152 |
| - |
153 |
| - @Override |
154 |
| - public void reset() { |
155 |
| - used = false; |
156 |
| - } |
157 |
| - |
158 |
| - @Override |
159 |
| - public void close() { |
160 |
| - value = null; |
161 |
| - } |
162 |
| - } |
163 |
| - |
| 27 | +public final class XFeatureField { |
164 | 28 | static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15;
|
165 | 29 |
|
166 | 30 | static float decodeFeatureValue(float freq) {
|
|
0 commit comments