Skip to content

Commit a2eeea4

Browse files
Include bytes for live docs in ShardFieldStats (#132232)
Co-authored-by: Jordan Powers <[email protected]>
1 parent 0dc4030 commit a2eeea4

File tree

3 files changed

+93
-12
lines changed

3 files changed

+93
-12
lines changed

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
import org.apache.lucene.search.ReferenceManager;
3333
import org.apache.lucene.search.similarities.Similarity;
3434
import org.apache.lucene.store.AlreadyClosedException;
35+
import org.apache.lucene.util.Bits;
3536
import org.apache.lucene.util.BytesRef;
37+
import org.apache.lucene.util.RamUsageEstimator;
3638
import org.apache.lucene.util.SetOnce;
3739
import org.elasticsearch.ExceptionsHelper;
3840
import org.elasticsearch.action.ActionListener;
@@ -280,6 +282,7 @@ protected static ShardFieldStats shardFieldStats(List<LeafReaderContext> leaves)
280282
int totalFields = 0;
281283
long usages = 0;
282284
long totalPostingBytes = 0;
285+
long liveDocsBytes = 0;
283286
for (LeafReaderContext leaf : leaves) {
284287
numSegments++;
285288
var fieldInfos = leaf.reader().getFieldInfos();
@@ -291,19 +294,44 @@ protected static ShardFieldStats shardFieldStats(List<LeafReaderContext> leaves)
291294
} else {
292295
usages = -1;
293296
}
294-
if (TrackingPostingsInMemoryBytesCodec.TRACK_POSTINGS_IN_MEMORY_BYTES.isEnabled()) {
297+
boolean trackPostingsMemoryEnabled = TrackingPostingsInMemoryBytesCodec.TRACK_POSTINGS_IN_MEMORY_BYTES.isEnabled();
298+
boolean trackLiveDocsMemoryEnabled = ShardFieldStats.TRACK_LIVE_DOCS_IN_MEMORY_BYTES.isEnabled();
299+
if (trackLiveDocsMemoryEnabled || trackPostingsMemoryEnabled) {
295300
SegmentReader segmentReader = Lucene.tryUnwrapSegmentReader(leaf.reader());
296301
if (segmentReader != null) {
297-
String postingBytes = segmentReader.getSegmentInfo().info.getAttribute(
298-
TrackingPostingsInMemoryBytesCodec.IN_MEMORY_POSTINGS_BYTES_KEY
299-
);
300-
if (postingBytes != null) {
301-
totalPostingBytes += Long.parseLong(postingBytes);
302+
if (trackPostingsMemoryEnabled) {
303+
String postingBytes = segmentReader.getSegmentInfo().info.getAttribute(
304+
TrackingPostingsInMemoryBytesCodec.IN_MEMORY_POSTINGS_BYTES_KEY
305+
);
306+
if (postingBytes != null) {
307+
totalPostingBytes += Long.parseLong(postingBytes);
308+
}
309+
}
310+
if (trackLiveDocsMemoryEnabled) {
311+
var liveDocs = segmentReader.getLiveDocs();
312+
if (liveDocs != null) {
313+
assert validateLiveDocsClass(liveDocs);
314+
// Would prefer to use FixedBitSet#ramBytesUsed() however FixedBits / Bits interface don't expose that.
315+
// This almost does what FixedBitSet#ramBytesUsed() does, liveDocs.length() returns the length of the bits long
316+
// array
317+
liveDocsBytes += RamUsageEstimator.alignObjectSize(
318+
(long) RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (liveDocs.length() / 8L)
319+
);
320+
}
302321
}
303322
}
304323
}
305324
}
306-
return new ShardFieldStats(numSegments, totalFields, usages, totalPostingBytes);
325+
return new ShardFieldStats(numSegments, totalFields, usages, totalPostingBytes, liveDocsBytes);
326+
}
327+
328+
private static boolean validateLiveDocsClass(Bits liveDocs) {
329+
// These classes are package protected in Lucene and therefor we compare fully qualified classnames as strings here:
330+
String fullClassName = liveDocs.getClass().getName();
331+
assert fullClassName.equals("org.apache.lucene.util.FixedBits")
332+
|| fullClassName.equals("org.apache.lucene.tests.codecs.asserting.AssertingLiveDocsFormat$AssertingBits")
333+
: "unexpected class [" + fullClassName + "]";
334+
return true;
307335
}
308336

309337
/**

server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,21 @@
99

1010
package org.elasticsearch.index.shard;
1111

12+
import org.elasticsearch.common.util.FeatureFlag;
13+
1214
/**
1315
* A per shard stats including the number of segments and total fields across those segments.
1416
* These stats should be recomputed whenever the shard is refreshed.
1517
*
16-
* @param numSegments the number of segments
17-
* @param totalFields the total number of fields across the segments
18-
* @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points)
19-
* -1 if unavailable
18+
* @param numSegments the number of segments
19+
* @param totalFields the total number of fields across the segments
20+
* @param fieldUsages the number of usages for segment-level fields (e.g., doc_values, postings, norms, points)
21+
* -1 if unavailable
2022
* @param postingsInMemoryBytes the total bytes in memory used for postings across all fields
23+
* @param liveDocsBytes the total bytes in memory used for live docs
2124
*/
22-
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages, long postingsInMemoryBytes) {
25+
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages, long postingsInMemoryBytes, long liveDocsBytes) {
26+
27+
public static final FeatureFlag TRACK_LIVE_DOCS_IN_MEMORY_BYTES = new FeatureFlag("track_live_docs_in_memory_bytes");
2328

2429
}

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,54 @@ public void testShardFieldStats() throws IOException {
19801980
closeShards(shard);
19811981
}
19821982

1983+
public void testShardFieldStatsWithDeletes() throws IOException {
1984+
Settings settings = Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE).build();
1985+
IndexShard shard = newShard(true, settings);
1986+
assertNull(shard.getShardFieldStats());
1987+
recoverShardFromStore(shard);
1988+
boolean liveDocsTrackingEnabled = ShardFieldStats.TRACK_LIVE_DOCS_IN_MEMORY_BYTES.isEnabled();
1989+
1990+
// index some documents
1991+
int numDocs = 10;
1992+
for (int i = 0; i < numDocs; i++) {
1993+
indexDoc(shard, "_doc", "first_" + i, """
1994+
{
1995+
"f1": "foo",
1996+
"f2": "bar"
1997+
}
1998+
""");
1999+
}
2000+
shard.refresh("test");
2001+
var stats = shard.getShardFieldStats();
2002+
assertThat(stats.numSegments(), equalTo(1));
2003+
assertThat(stats.liveDocsBytes(), equalTo(0L));
2004+
2005+
// delete a doc
2006+
deleteDoc(shard, "first_0");
2007+
2008+
// Refresh and fetch new stats:
2009+
shard.refresh("test");
2010+
stats = shard.getShardFieldStats();
2011+
// More segments because delete operation is stored in the new segment for replication purposes.
2012+
assertThat(stats.numSegments(), equalTo(2));
2013+
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2014+
assertThat(stats.liveDocsBytes(), equalTo(liveDocsTrackingEnabled ? 40L : 0L));
2015+
2016+
// delete another doc:
2017+
deleteDoc(shard, "first_1");
2018+
shard.getMinRetainedSeqNo();
2019+
2020+
// Refresh and fetch new stats:
2021+
shard.refresh("test");
2022+
stats = shard.getShardFieldStats();
2023+
// More segments because delete operation is stored in the new segment for replication purposes.
2024+
assertThat(stats.numSegments(), equalTo(3));
2025+
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2026+
assertThat(stats.liveDocsBytes(), equalTo(liveDocsTrackingEnabled ? 56L : 0L));
2027+
2028+
closeShards(shard);
2029+
}
2030+
19832031
public void testIndexingOperationsListeners() throws IOException {
19842032
IndexShard shard = newStartedShard(true);
19852033
indexDoc(shard, "_doc", "0", "{\"foo\" : \"bar\"}");

0 commit comments

Comments
 (0)