Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove maxMergeAtOnce option from TieredMergePolicy. #14165

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ API Changes
---------------------
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)

* GITHUB#14165: TieredMergePolicy's maxMergeAtOnce parameter was removed. (Adrien Grand)

New Features
---------------------
* GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov)

Improvements
---------------------

* GITHUB#266: TieredMergePolicy's maxMergeAtOnce default value was changed from 10 to 30. (Adrien Grand)
(No changes)

Optimizations
---------------------
Expand Down
7 changes: 7 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@

# Apache Lucene Migration Guide

## Migration from Lucene 10.x to Lucene 11.0

### TieredMergePolicy#setMaxMergeAtOnce removed

This parameter has no replacement, TieredMergePolicy no longer bounds the
number of segments that may be merged together.

## Migration from Lucene 9.x to Lucene 10.0

### DataInput#readVLong() may now read negative vlongs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
/**
* Merges segments of approximately equal size, subject to an allowed number of segments per tier.
* This is similar to {@link LogByteSizeMergePolicy}, except this merge policy is able to merge
* non-adjacent segment, and separates how many segments are merged at once ({@link
* #setMaxMergeAtOnce}) from how many segments are allowed per tier ({@link #setSegmentsPerTier}).
* This merge policy also does not over-merge (i.e. cascade merges).
* non-adjacent segment. This merge policy also does not over-merge (i.e. cascade merges).
*
* <p>For normal merging, this policy first computes a "budget" of how many segments are allowed to
* be in the index. If the index is over-budget, then the policy sorts segments by decreasing size
Expand Down Expand Up @@ -84,9 +82,6 @@ public class TieredMergePolicy extends MergePolicy {
*/
public static final double DEFAULT_NO_CFS_RATIO = 0.1;

// User-specified maxMergeAtOnce. In practice we always take the min of its
// value and segsPerTier for segments above the floor size to avoid suboptimal merging.
private int maxMergeAtOnce = 30;
private long maxMergedSegmentBytes = 5 * 1024 * 1024 * 1024L;

private long floorSegmentBytes = 2 * 1024 * 1024L;
Expand All @@ -100,36 +95,12 @@ public TieredMergePolicy() {
super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
}

/**
* Maximum number of segments to be merged at a time during "normal" merging. Default is 30.
*
* <p><b>NOTE</b>: Merges above the {@link #setFloorSegmentMB(double) floor segment size} also
* bound the number of merged segments by {@link #setSegmentsPerTier(double) the number of
* segments per tier}.
*/
public TieredMergePolicy setMaxMergeAtOnce(int v) {
if (v < 2) {
throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")");
}
maxMergeAtOnce = v;
return this;
}

private enum MERGE_TYPE {
NATURAL,
FORCE_MERGE,
FORCE_MERGE_DELETES
}

/**
* Returns the current maxMergeAtOnce setting.
*
* @see #setMaxMergeAtOnce
*/
public int getMaxMergeAtOnce() {
return maxMergeAtOnce;
}

// TODO: should addIndexes do explicit merging, too? And,
// if user calls IW.maybeMerge "explicitly"

Expand Down Expand Up @@ -429,7 +400,7 @@ public MergeSpecification findMerges(
}
allowedDelCount = Math.max(0, allowedDelCount);

final int mergeFactor = (int) Math.min(maxMergeAtOnce, segsPerTier);
final int mergeFactor = (int) segsPerTier;
// Compute max allowed segments for the remainder of the index
long levelSize = Math.max(minSegmentBytes, floorSegmentBytes);
long bytesLeft = totIndexBytes;
Expand Down Expand Up @@ -570,7 +541,6 @@ private MergeSpecification doFindMerges(
long docCountThisMerge = 0;
for (int idx = startIdx;
idx < sortedEligible.size()
&& candidate.size() < maxMergeAtOnce
// We allow merging more than mergeFactor segments together if the merged segment
// would be less than the floor segment size. This is important because segments
// below the floor segment size are more aggressively merged by this policy, so we
Expand Down Expand Up @@ -733,7 +703,7 @@ protected MergeScore score(
// matter in this case because this merge will not
// "cascade" and so it cannot lead to N^2 merge cost
// over time:
final int mergeFactor = (int) Math.min(maxMergeAtOnce, segsPerTier);
int mergeFactor = (int) segsPerTier;
skew = 1.0 / mergeFactor;
} else {
skew =
Expand Down Expand Up @@ -1021,7 +991,6 @@ private long floorSize(long bytes) {
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": ");
sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", ");
sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes / 1024. / 1024.).append(", ");
sb.append("floorSegmentMB=").append(floorSegmentBytes / 1024. / 1024.).append(", ");
sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ public void testLargeSegment() throws Exception {
128)); // Make sure to use the ConfigurableMCodec instead of a random one
iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments
TieredMergePolicy mp = new TieredMergePolicy();
mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?)
mp.setSegmentsPerTier(256); // only merge once at the end when we ask
iwc.setMergePolicy(mp);
String fieldName = "field";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public void runTest(Directory directory) throws Exception {

IndexWriterConfig conf =
new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(7);
((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3);
((TieredMergePolicy) conf.getMergePolicy()).setSegmentsPerTier(3);
IndexWriter writer = RandomIndexWriter.mockIndexWriter(directory, conf, random());

// Establish a base index of 100 docs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ protected void doMerge(MergeSource mergeSource, MergePolicy.OneMerge merge)

TieredMergePolicy tmp = new TieredMergePolicy();
iwc.setMergePolicy(tmp);
tmp.setMaxMergeAtOnce(2);
tmp.setSegmentsPerTier(2);

IndexWriter w = new IndexWriter(dir, iwc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class TestForTooMuchCloning extends LuceneTestCase {
public void test() throws Exception {
final MockDirectoryWrapper dir = newMockDirectory();
final TieredMergePolicy tmp = new TieredMergePolicy();
tmp.setMaxMergeAtOnce(2);
tmp.setSegmentsPerTier(2);
final RandomIndexWriter w =
new RandomIndexWriter(
random(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public void test() throws Exception {
MergePolicy mp = w.getConfig().getMergePolicy();
final int mergeAtOnce = 1 + w.cloneSegmentInfos().size();
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeAtOnce);
((TieredMergePolicy) mp).setSegmentsPerTier(mergeAtOnce);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setMergeFactor(mergeAtOnce);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws

// below we make the assumption that segments that reached the max segment
// size divided by 2 don't need merging anymore
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
int mergeFactor = (int) tmp.getSegmentsPerTier();
while (true) {
final double segCountLevel = bytesLeft / (double) levelSizeBytes;
if (segCountLevel <= tmp.getSegmentsPerTier()
Expand Down Expand Up @@ -145,12 +145,11 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws
assertTrue(
String.format(
Locale.ROOT,
"mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g targetNumSegments=%d",
"mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g targetNumSegments=%d",
mergeFactor,
minSegmentBytes,
maxMergedSegmentBytes,
tmp.getSegmentsPerTier(),
tmp.getMaxMergeAtOnce(),
numSegments,
allowedSegCount,
totalBytes,
Expand All @@ -162,10 +161,7 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws

@Override
protected void assertMerge(MergePolicy policy, MergeSpecification merges) {
TieredMergePolicy tmp = (TieredMergePolicy) policy;
for (OneMerge merge : merges.merges) {
assertTrue(merge.segments.size() <= tmp.getMaxMergeAtOnce());
}
// anything to assert?
}

public void testForceMergeDeletes() throws Exception {
Expand All @@ -174,7 +170,6 @@ public void testForceMergeDeletes() throws Exception {
TieredMergePolicy tmp = newTieredMergePolicy();
conf.setMergePolicy(tmp);
conf.setMaxBufferedDocs(4);
tmp.setMaxMergeAtOnce(100);
tmp.setSegmentsPerTier(100);
tmp.setDeletesPctAllowed(50.0);
tmp.setForceMergeDeletesPctAllowed(30.0);
Expand Down Expand Up @@ -219,8 +214,8 @@ public void testPartialMerge() throws Exception {
TieredMergePolicy tmp = newTieredMergePolicy();
conf.setMergePolicy(tmp);
conf.setMaxBufferedDocs(2);
tmp.setMaxMergeAtOnce(3);
tmp.setSegmentsPerTier(6);
tmp.setFloorSegmentMB(Double.MIN_VALUE);

IndexWriter w = new IndexWriter(dir, conf);
int maxCount = 0;
Expand All @@ -231,7 +226,7 @@ public void testPartialMerge() throws Exception {
w.addDocument(doc);
int count = w.getSegmentCount();
maxCount = Math.max(count, maxCount);
assertTrue("count=" + count + " maxCount=" + maxCount, count >= maxCount - 3);
assertTrue("count=" + count + " maxCount=" + maxCount, count >= maxCount - 6);
}

w.flush(true, true);
Expand Down Expand Up @@ -973,15 +968,13 @@ public void testMergeSizeIsLessThanFloorSize() throws IOException {
assertEquals(15, oneMerge.segments.size());
}

// Segments are below the floor segment size and we'd need to merge more than maxMergeAtOnce
// segments to go above the minimum segment size. We get 1 merge of maxMergeAtOnce=30 segments
// and 1 merge of 50-30=20 segments.
// Segments are below the floor segment size. We get one merge that merges the 50 segments
// together.
mergePolicy.setFloorSegmentMB(60);
mergeSpec = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext);
assertNotNull(mergeSpec);
assertEquals(2, mergeSpec.merges.size());
assertEquals(30, mergeSpec.merges.get(0).segments.size());
assertEquals(20, mergeSpec.merges.get(1).segments.size());
assertEquals(1, mergeSpec.merges.size());
assertEquals(50, mergeSpec.merges.get(0).segments.size());
}

public void testFullFlushMerges() throws IOException {
Expand All @@ -1008,6 +1001,6 @@ public void testFullFlushMerges() throws IOException {
segmentInfos =
applyMerge(segmentInfos, merge, "_" + segNameGenerator.getAndIncrement(), stats);
}
assertEquals(2, segmentInfos.size());
assertEquals(1, segmentInfos.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ private static IndexWriter initWriter(
if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setSegmentsPerTier(3);
tmp.setMaxMergeAtOnce(3);
} else if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,11 +1096,6 @@ private static void configureRandom(Random r, MergePolicy mergePolicy) {

public static TieredMergePolicy newTieredMergePolicy(Random r) {
TieredMergePolicy tmp = new TieredMergePolicy();
if (rarely(r)) {
tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 2, 9));
} else {
tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 10, 50));
}
if (rarely(r)) {
tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
} else {
Expand Down Expand Up @@ -1235,11 +1230,6 @@ public static void maybeChangeLiveIndexWriterConfig(Random r, LiveIndexWriterCon
}
} else if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
if (rarely(r)) {
tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 2, 9));
} else {
tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 10, 50));
}
if (rarely(r)) {
tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,6 @@ public static void reduceOpenFiles(IndexWriter w) {
lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
} else if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
}
MergeScheduler ms = w.getConfig().getMergeScheduler();
Expand Down
Loading