Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ Bug Fixes
* GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs
(instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh)

* GITHUB#15120: Add estimatedByteSizes to merges kicked off by IndexWriter.addIndexes(CodecReader[]) (Craig Perkins)

Changes in Runtime Behavior
---------------------
* GITHUB#14187: The query cache is now disabled by default. (Adrien Grand)
Expand Down
33 changes: 22 additions & 11 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Instant;
import java.util.ArrayDeque;
import java.util.ArrayList;
Expand Down Expand Up @@ -3285,6 +3286,11 @@ public AddIndexesMergeSource(IndexWriter writer) {
}

public void registerMerge(MergePolicy.OneMerge merge) {
try {
addEstimatedBytesToMerge(merge);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
synchronized (IndexWriter.this) {
pendingAddIndexesMerges.add(merge);
}
Expand Down Expand Up @@ -4777,6 +4783,21 @@ private void abortOneMerge(MergePolicy.OneMerge merge) throws IOException {
closeMergeReaders(merge, true, false);
}

/** Compute {@code estimatedMergeBytes} and {@code totalMergeBytes} for a merge. */
void addEstimatedBytesToMerge(MergePolicy.OneMerge merge) throws IOException {
assert merge.estimatedMergeBytes == 0;
assert merge.totalMergeBytes == 0;
for (SegmentCommitInfo info : merge.segments) {
if (info.info.maxDoc() > 0) {
final int delCount = numDeletedDocs(info);
assert delCount <= info.info.maxDoc();
final double delRatio = ((double) delCount) / info.info.maxDoc();
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
merge.totalMergeBytes += info.sizeInBytes();
}
}
}

/**
* Checks whether this merge involves any segments already participating in a merge. If not, this
* merge is "registered", meaning we record that its segments are now participating in a merge,
Expand Down Expand Up @@ -4868,17 +4889,7 @@ private synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IO
mergingSegments.add(info);
}

assert merge.estimatedMergeBytes == 0;
assert merge.totalMergeBytes == 0;
for (SegmentCommitInfo info : merge.segments) {
if (info.info.maxDoc() > 0) {
final int delCount = numDeletedDocs(info);
assert delCount <= info.info.maxDoc();
final double delRatio = ((double) delCount) / info.info.maxDoc();
merge.estimatedMergeBytes += (long) (info.sizeInBytes() * (1.0 - delRatio));
merge.totalMergeBytes += info.sizeInBytes();
}
}
addEstimatedBytesToMerge(merge);

// Merge is now registered
merge.registerDone = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,4 +461,35 @@ public void run() {

directory.close();
}

public void testAddEstimatedBytesToMerge() throws IOException {
try (Directory dir = newDirectory();
IndexWriter writer =
new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE))) {

Document doc = new Document();
doc.add(newTextField("field", "content", Field.Store.YES));

for (int i = 0; i < 10; i++) {

writer.addDocument(doc);
}
writer.flush();

// Create a merge with the segments
SegmentInfos segmentInfos = writer.cloneSegmentInfos();
MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.asList());

writer.addEstimatedBytesToMerge(merge);

assertTrue(merge.estimatedMergeBytes > 0);

assertTrue(merge.totalMergeBytes > 0);

assertTrue(merge.estimatedMergeBytes <= merge.totalMergeBytes);
}
}
}
Loading