Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,14 @@
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/**
* Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
* vectors.
*/
public class PerFieldFormatSupplier {

private static final Set<String> INCLUDE_META_FIELDS;

static {
// TODO: should we just allow all fields to use tsdb doc values codec?
// Avoid using tsdb codec for fields like _seq_no, _primary_term.
// But _tsid and _ts_routing_hash should always use the tsdb codec.
Set<String> includeMetaField = new HashSet<>(3);
includeMetaField.add(TimeSeriesIdFieldMapper.NAME);
includeMetaField.add(TimeSeriesRoutingHashFieldMapper.NAME);
includeMetaField.add(SeqNoFieldMapper.NAME);
// Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in
// RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values.
INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField);
}

private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
Expand Down Expand Up @@ -134,19 +112,11 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
}

boolean useTSDBDocValuesFormat(final String field) {
if (excludeFields(field)) {
return false;
}

return mapperService != null
&& (isTimeSeriesModeIndex() || isLogsModeIndex())
&& mapperService.getIndexSettings().isES87TSDBCodecEnabled();
}

private boolean excludeFields(String fieldName) {
return fieldName.startsWith("_") && INCLUDE_META_FIELDS.contains(fieldName) == false;
}

private boolean isTimeSeriesModeIndex() {
return mapperService != null && IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.elasticsearch.index.codec.FilterDocValuesProducer;
import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat;

/**
Expand Down Expand Up @@ -47,10 +46,8 @@ static MergeStats compatibleWithOptimizedMerge(boolean optimizedMergeEnabled, Me
continue;
}
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer instanceof FilterDocValuesProducer filterDocValuesProducer) {
docValuesProducer = filterDocValuesProducer.getIn();
}

// Don't handle producers that are wrapped as these could alter the values, which makes returned merge stats incorrect.
// For example, SourcePruningFilterCodecReader doc values producers will at some point remove values.
if (docValuesProducer instanceof XPerFieldDocValuesFormat.FieldsReader perFieldReader) {
var wrapped = perFieldReader.getDocValuesProducer(fieldInfo);
if (wrapped == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ public void testMetaFields() throws IOException {
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(TimeSeriesIdFieldMapper.NAME)), is(true));
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(TimeSeriesRoutingHashFieldMapper.NAME)), is(true));
// See: PerFieldFormatSupplier why these fields shouldn't use tsdb codec
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_NAME)), is(false));
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME)), is(false));
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_NAME)), is(true));
assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME)), is(true));
}

public void testSeqnoField() throws IOException {
Expand Down