diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 39d7403a11c4..9c10d17452a5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -9,6 +9,8 @@ API Changes --------------------- * GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski) +* GITHUB#14165: TieredMergePolicy's maxMergeAtOnce parameter was removed. (Adrien Grand) + New Features --------------------- * GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov) @@ -16,7 +18,7 @@ New Features Improvements --------------------- -* GITHUB#266: TieredMergePolicy's maxMergeAtOnce default value was changed from 10 to 30. (Adrien Grand) +(No changes) Optimizations --------------------- diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index 1db50b7fdd89..62a30c2444cb 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -17,6 +17,13 @@ # Apache Lucene Migration Guide +## Migration from Lucene 10.x to Lucene 11.0 + +### TieredMergePolicy#setMaxMergeAtOnce removed + +This parameter has no replacement, TieredMergePolicy no longer bounds the +number of segments that may be merged together. + ## Migration from Lucene 9.x to Lucene 10.0 ### DataInput#readVLong() may now read negative vlongs diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 7b56471c8bf4..70036ce9acb6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -30,9 +30,7 @@ /** * Merges segments of approximately equal size, subject to an allowed number of segments per tier. * This is similar to {@link LogByteSizeMergePolicy}, except this merge policy is able to merge - * non-adjacent segment, and separates how many segments are merged at once ({@link - * #setMaxMergeAtOnce}) from how many segments are allowed per tier ({@link #setSegmentsPerTier}). - * This merge policy also does not over-merge (i.e. cascade merges). + * non-adjacent segment. This merge policy also does not over-merge (i.e. cascade merges). * *

For normal merging, this policy first computes a "budget" of how many segments are allowed to * be in the index. If the index is over-budget, then the policy sorts segments by decreasing size @@ -84,9 +82,6 @@ public class TieredMergePolicy extends MergePolicy { */ public static final double DEFAULT_NO_CFS_RATIO = 0.1; - // User-specified maxMergeAtOnce. In practice we always take the min of its - // value and segsPerTier for segments above the floor size to avoid suboptimal merging. - private int maxMergeAtOnce = 30; private long maxMergedSegmentBytes = 5 * 1024 * 1024 * 1024L; private long floorSegmentBytes = 2 * 1024 * 1024L; @@ -100,36 +95,12 @@ public TieredMergePolicy() { super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE); } - /** - * Maximum number of segments to be merged at a time during "normal" merging. Default is 30. - * - *

NOTE: Merges above the {@link #setFloorSegmentMB(double) floor segment size} also - * bound the number of merged segments by {@link #setSegmentsPerTier(double) the number of - * segments per tier}. - */ - public TieredMergePolicy setMaxMergeAtOnce(int v) { - if (v < 2) { - throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")"); - } - maxMergeAtOnce = v; - return this; - } - private enum MERGE_TYPE { NATURAL, FORCE_MERGE, FORCE_MERGE_DELETES } - /** - * Returns the current maxMergeAtOnce setting. - * - * @see #setMaxMergeAtOnce - */ - public int getMaxMergeAtOnce() { - return maxMergeAtOnce; - } - // TODO: should addIndexes do explicit merging, too? And, // if user calls IW.maybeMerge "explicitly" @@ -429,7 +400,7 @@ public MergeSpecification findMerges( } allowedDelCount = Math.max(0, allowedDelCount); - final int mergeFactor = (int) Math.min(maxMergeAtOnce, segsPerTier); + final int mergeFactor = (int) segsPerTier; // Compute max allowed segments for the remainder of the index long levelSize = Math.max(minSegmentBytes, floorSegmentBytes); long bytesLeft = totIndexBytes; @@ -570,7 +541,6 @@ private MergeSpecification doFindMerges( long docCountThisMerge = 0; for (int idx = startIdx; idx < sortedEligible.size() - && candidate.size() < maxMergeAtOnce // We allow merging more than mergeFactor segments together if the merged segment // would be less than the floor segment size. This is important because segments // below the floor segment size are more aggressively merged by this policy, so we @@ -733,7 +703,7 @@ protected MergeScore score( // matter in this case because this merge will not // "cascade" and so it cannot lead to N^2 merge cost // over time: - final int mergeFactor = (int) Math.min(maxMergeAtOnce, segsPerTier); + int mergeFactor = (int) segsPerTier; skew = 1.0 / mergeFactor; } else { skew = @@ -1021,7 +991,6 @@ private long floorSize(long bytes) { @Override public String toString() { StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": "); - sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", "); sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes / 1024. / 1024.).append(", "); sb.append("floorSegmentMB=").append(floorSegmentBytes / 1024. / 1024.).append(", "); sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", "); diff --git a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java index 2023ee73391d..7db87e231ff0 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java @@ -43,7 +43,6 @@ public void testLargeSegment() throws Exception { 128)); // Make sure to use the ConfigurableMCodec instead of a random one iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments TieredMergePolicy mp = new TieredMergePolicy(); - mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?) mp.setSegmentsPerTier(256); // only merge once at the end when we ask iwc.setMergePolicy(mp); String fieldName = "field"; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java b/lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java index b951a565d623..5dc8327c7997 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java @@ -109,7 +109,7 @@ public void runTest(Directory directory) throws Exception { IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(7); - ((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3); + ((TieredMergePolicy) conf.getMergePolicy()).setSegmentsPerTier(3); IndexWriter writer = RandomIndexWriter.mockIndexWriter(directory, conf, random()); // Establish a base index of 100 docs: diff --git a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index fcf42177570e..0d52481b908e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -375,7 +375,6 @@ protected void doMerge(MergeSource mergeSource, MergePolicy.OneMerge merge) TieredMergePolicy tmp = new TieredMergePolicy(); iwc.setMergePolicy(tmp); - tmp.setMaxMergeAtOnce(2); tmp.setSegmentsPerTier(2); IndexWriter w = new IndexWriter(dir, iwc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestForTooMuchCloning.java b/lucene/core/src/test/org/apache/lucene/index/TestForTooMuchCloning.java index d06330c29269..924700835403 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestForTooMuchCloning.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestForTooMuchCloning.java @@ -36,7 +36,7 @@ public class TestForTooMuchCloning extends LuceneTestCase { public void test() throws Exception { final MockDirectoryWrapper dir = newMockDirectory(); final TieredMergePolicy tmp = new TieredMergePolicy(); - tmp.setMaxMergeAtOnce(2); + tmp.setSegmentsPerTier(2); final RandomIndexWriter w = new RandomIndexWriter( random(), diff --git a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java index 78514b2c3ed5..a5cf2b67f488 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java @@ -70,7 +70,7 @@ public void test() throws Exception { MergePolicy mp = w.getConfig().getMergePolicy(); final int mergeAtOnce = 1 + w.cloneSegmentInfos().size(); if (mp instanceof TieredMergePolicy) { - ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeAtOnce); + ((TieredMergePolicy) mp).setSegmentsPerTier(mergeAtOnce); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(mergeAtOnce); } else { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java index 58e33cb7648e..6f11f71f3a99 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -104,7 +104,7 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws // below we make the assumption that segments that reached the max segment // size divided by 2 don't need merging anymore - int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce()); + int mergeFactor = (int) tmp.getSegmentsPerTier(); while (true) { final double segCountLevel = bytesLeft / (double) levelSizeBytes; if (segCountLevel <= tmp.getSegmentsPerTier() @@ -145,12 +145,11 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws assertTrue( String.format( Locale.ROOT, - "mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g targetNumSegments=%d", + "mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g targetNumSegments=%d", mergeFactor, minSegmentBytes, maxMergedSegmentBytes, tmp.getSegmentsPerTier(), - tmp.getMaxMergeAtOnce(), numSegments, allowedSegCount, totalBytes, @@ -162,10 +161,7 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws @Override protected void assertMerge(MergePolicy policy, MergeSpecification merges) { - TieredMergePolicy tmp = (TieredMergePolicy) policy; - for (OneMerge merge : merges.merges) { - assertTrue(merge.segments.size() <= tmp.getMaxMergeAtOnce()); - } + // anything to assert? } public void testForceMergeDeletes() throws Exception { @@ -174,7 +170,6 @@ public void testForceMergeDeletes() throws Exception { TieredMergePolicy tmp = newTieredMergePolicy(); conf.setMergePolicy(tmp); conf.setMaxBufferedDocs(4); - tmp.setMaxMergeAtOnce(100); tmp.setSegmentsPerTier(100); tmp.setDeletesPctAllowed(50.0); tmp.setForceMergeDeletesPctAllowed(30.0); @@ -219,8 +214,8 @@ public void testPartialMerge() throws Exception { TieredMergePolicy tmp = newTieredMergePolicy(); conf.setMergePolicy(tmp); conf.setMaxBufferedDocs(2); - tmp.setMaxMergeAtOnce(3); tmp.setSegmentsPerTier(6); + tmp.setFloorSegmentMB(Double.MIN_VALUE); IndexWriter w = new IndexWriter(dir, conf); int maxCount = 0; @@ -231,7 +226,7 @@ public void testPartialMerge() throws Exception { w.addDocument(doc); int count = w.getSegmentCount(); maxCount = Math.max(count, maxCount); - assertTrue("count=" + count + " maxCount=" + maxCount, count >= maxCount - 3); + assertTrue("count=" + count + " maxCount=" + maxCount, count >= maxCount - 6); } w.flush(true, true); @@ -973,15 +968,13 @@ public void testMergeSizeIsLessThanFloorSize() throws IOException { assertEquals(15, oneMerge.segments.size()); } - // Segments are below the floor segment size and we'd need to merge more than maxMergeAtOnce - // segments to go above the minimum segment size. We get 1 merge of maxMergeAtOnce=30 segments - // and 1 merge of 50-30=20 segments. + // Segments are below the floor segment size. We get one merge that merges the 50 segments + // together. mergePolicy.setFloorSegmentMB(60); mergeSpec = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); assertNotNull(mergeSpec); - assertEquals(2, mergeSpec.merges.size()); - assertEquals(30, mergeSpec.merges.get(0).segments.size()); - assertEquals(20, mergeSpec.merges.get(1).segments.size()); + assertEquals(1, mergeSpec.merges.size()); + assertEquals(50, mergeSpec.merges.get(0).segments.size()); } public void testFullFlushMerges() throws IOException { @@ -1008,6 +1001,6 @@ public void testFullFlushMerges() throws IOException { segmentInfos = applyMerge(segmentInfos, merge, "_" + segNameGenerator.getAndIncrement(), stats); } - assertEquals(2, segmentInfos.size()); + assertEquals(1, segmentInfos.size()); } } diff --git a/lucene/replicator/src/test/org/apache/lucene/replicator/nrt/SimplePrimaryNode.java b/lucene/replicator/src/test/org/apache/lucene/replicator/nrt/SimplePrimaryNode.java index 8c1f5fd71e3a..c05f5e028a08 100644 --- a/lucene/replicator/src/test/org/apache/lucene/replicator/nrt/SimplePrimaryNode.java +++ b/lucene/replicator/src/test/org/apache/lucene/replicator/nrt/SimplePrimaryNode.java @@ -164,7 +164,6 @@ private static IndexWriter initWriter( if (mp instanceof TieredMergePolicy) { TieredMergePolicy tmp = (TieredMergePolicy) mp; tmp.setSegmentsPerTier(3); - tmp.setMaxMergeAtOnce(3); } else if (mp instanceof LogMergePolicy) { LogMergePolicy lmp = (LogMergePolicy) mp; lmp.setMergeFactor(3); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java index 84fa120b88b1..ff4eb908e9b0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java @@ -1096,11 +1096,6 @@ private static void configureRandom(Random r, MergePolicy mergePolicy) { public static TieredMergePolicy newTieredMergePolicy(Random r) { TieredMergePolicy tmp = new TieredMergePolicy(); - if (rarely(r)) { - tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 2, 9)); - } else { - tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 10, 50)); - } if (rarely(r)) { tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); } else { @@ -1235,11 +1230,6 @@ public static void maybeChangeLiveIndexWriterConfig(Random r, LiveIndexWriterCon } } else if (mp instanceof TieredMergePolicy) { TieredMergePolicy tmp = (TieredMergePolicy) mp; - if (rarely(r)) { - tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 2, 9)); - } else { - tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 10, 50)); - } if (rarely(r)) { tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); } else { diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java index 6715edecc166..c2f5d886e3c0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java @@ -1453,7 +1453,6 @@ public static void reduceOpenFiles(IndexWriter w) { lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); } else if (mp instanceof TieredMergePolicy) { TieredMergePolicy tmp = (TieredMergePolicy) mp; - tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier())); } MergeScheduler ms = w.getConfig().getMergeScheduler();