apache · shubhamsrkdev · Oct 6, 2025 · Oct 6, 2025 · Oct 7, 2025 · Oct 7, 2025
diff --git a/.../test/org/apache/lucene/backward_codecs/lucene80/BaseLucene80DocValuesFormatTestCase.java b/.../test/org/apache/lucene/backward_codecs/lucene80/BaseLucene80DocValuesFormatTestCase.java
@@ -697,7 +697,8 @@ private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) th
     IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
     conf.setMaxBufferedDocs(maxBufferedDocs);
     conf.setRAMBufferSizeMB(-1);
-    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    conf.setMergePolicy(newLogMergePolicy());
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(random().nextBoolean());
     return new IndexWriter(dir, conf);
   }
 
@@ -727,7 +728,8 @@ private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts)
     conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
     conf.setRAMBufferSizeMB(-1);
     // so Lucene docids are predictable / stay in order
-    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    conf.setMergePolicy(newLogMergePolicy());
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(random().nextBoolean());
     IndexWriter writer = new IndexWriter(dir, conf);
 
     final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE * 3);
@@ -797,7 +799,8 @@ private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) thro
     IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
     conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
     conf.setRAMBufferSizeMB(-1);
-    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    conf.setMergePolicy(newLogMergePolicy());
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(random().nextBoolean());
     IndexWriter writer = new IndexWriter(dir, conf);
     Document doc = new Document();
     Field storedField = newStringField("stored", "", Field.Store.YES);

diff --git a/...ard-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/...ard-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java
@@ -53,7 +53,6 @@
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.KnnVectorValues;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MultiBits;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.MultiTerms;
@@ -125,15 +124,14 @@ protected void createIndex(Directory directory) throws IOException {
   }
 
   static void createIndex(Directory dir, boolean doCFS, boolean fullyMerged) throws IOException {
-    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
-    mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
-    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     // TODO: remove randomness
     IndexWriterConfig conf =
         new IndexWriterConfig(new MockAnalyzer(random()))
             .setMaxBufferedDocs(10)
             .setCodec(TestUtil.getDefaultCodec())
             .setMergePolicy(NoMergePolicy.INSTANCE);
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(doCFS);
+    conf.getCodec().compoundFormat().setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     IndexWriter writer = new IndexWriter(dir, conf);
 
     for (int i = 0; i < DOCS_COUNT; i++) {
@@ -147,14 +145,13 @@ static void createIndex(Directory dir, boolean doCFS, boolean fullyMerged) throw
 
     if (!fullyMerged) {
       // open fresh writer so we get no prx file in the added segment
-      mp = new LogByteSizeMergePolicy();
-      mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
       // TODO: remove randomness
       conf =
           new IndexWriterConfig(new MockAnalyzer(random()))
               .setMaxBufferedDocs(10)
               .setCodec(TestUtil.getDefaultCodec())
               .setMergePolicy(NoMergePolicy.INSTANCE);
+      conf.getCodec().compoundFormat().setShouldUseCompoundFile(doCFS);
       writer = new IndexWriter(dir, conf);
       addNoProxDoc(writer);
       writer.close();

diff --git a/...codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java b/...codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java
@@ -153,15 +153,15 @@ public void testSortedIndex() throws Exception {
   @Override
   protected void createIndex(Directory directory) throws IOException {
     LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
-    mp.setNoCFSRatio(1.0);
-    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     MockAnalyzer analyzer = new MockAnalyzer(random());
 
     // Don't filter out tokens that are too short because we use those tokens in assertions (#14344)
     analyzer.setMaxTokenLength(RandomizedTest.randomIntBetween(5, IndexWriter.MAX_TERM_LENGTH));
 
     // TODO: remove randomness
     IndexWriterConfig conf = new IndexWriterConfig(analyzer);
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(true);
+    conf.getCodec().compoundFormat().setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     conf.setMergePolicy(mp);
     conf.setUseCompoundFile(false);
     conf.setCodec(TestUtil.getDefaultCodec());

diff --git a/...codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java b/...codecs/src/test/org/apache/lucene/backward_index/TestMoreTermsBackwardsCompatibility.java
@@ -57,8 +57,6 @@ public static Iterable<Object[]> testVersionsFactory() {
   @Override
   protected void createIndex(Directory directory) throws IOException {
     LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
-    mp.setNoCFSRatio(1.0);
-    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     MockAnalyzer analyzer = new MockAnalyzer(random());
     analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
 
@@ -67,6 +65,8 @@ protected void createIndex(Directory directory) throws IOException {
             .setMergePolicy(mp)
             .setCodec(TestUtil.getDefaultCodec())
             .setUseCompoundFile(false);
+    conf.getCodec().compoundFormat().setShouldUseCompoundFile(true);
+    conf.getCodec().compoundFormat().setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
     IndexWriter writer = new IndexWriter(directory, conf);
     LineFileDocs docs = new LineFileDocs(new Random(0));
     for (int i = 0; i < 50; i++) {

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
@@ -180,7 +180,7 @@ public PostingsFormat postingsFormat() {
         throw new RuntimeException(
             "unable to instantiate class '" + mergePolicy + "' as merge policy", e);
       }
-      iwConf.getMergePolicy().setNoCFSRatio(isCompound ? 1.0 : 0.0);
+      iwConf.getCodec().compoundFormat().setShouldUseCompoundFile(isCompound);
       if (iwConf.getMergePolicy() instanceof LogMergePolicy) {
         LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
         logMergePolicy.setMergeFactor(

diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@@ -642,7 +642,7 @@ public void testIndexWriterSettings() throws Exception {
     assertEquals(
         IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
     assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
-    assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
+    assertFalse(writer.getConfig().getCodec().compoundFormat().getShouldUseCompoundFile());
     writer.close();
     Directory dir = benchmark.getRunData().getDirectory();
     IndexReader reader = DirectoryReader.open(dir);

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
@@ -17,6 +17,8 @@
 package org.apache.lucene.codecs;
 
 import java.io.IOException;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
@@ -34,6 +36,152 @@ protected CompoundFormat() {}
   // TODO: this is very minimal. If we need more methods,
   // we can add 'producer' classes.
 
+  /** Default document count threshold for using compound files with LogDocMergePolicy */
+  static final int DEFAULT_CFS_THRESHOLD_DOC_SIZE = 65536; // docs
+
+  /** Default byte size threshold for using compound files with other merge policies (64MB) */
+  static final long DEFAULT_CFS_THRESHOLD_BYTE_SIZE = 64L * 1024 * 1024; // 64MB
+
+  /** Default maximum segment size allowed for compound files (no limit) */
+  static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
+
+  /** Document count threshold for LogDocMergePolicy */
+  private int cfsThresholdDocSize = DEFAULT_CFS_THRESHOLD_DOC_SIZE;
+
+  /** Byte size threshold for other merge policies */
+  private long cfsThresholdByteSize = DEFAULT_CFS_THRESHOLD_BYTE_SIZE;
+
+  /** Whether compound files should be used at all */
+  private boolean shouldUseCompoundFile = true;
+
+  /** Maximum segment size that can be stored as compound file */
+  private long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
+
+  /**
+   * Sets the document count threshold for using compound files with LogDocMergePolicy. Segments
+   * with document count less than or equal to this threshold will use compound files.
+   *
+   * @param threshold the document count threshold
+   */
+  public void setCfsThresholdDocSize(int threshold) {
+    this.cfsThresholdDocSize = threshold;
+  }
+
+  /**
+   * Sets the byte size threshold for using compound files with merge policies other than
+   * LogDocMergePolicy. Segments with size less than or equal to this threshold will use compound
+   * files.
+   *
+   * @param thresholdBytes the byte size threshold in bytes
+   */
+  public void setCfsThresholdByteSize(long thresholdBytes) {
+    this.cfsThresholdByteSize = thresholdBytes;
+  }
+
+  /**
+   * Returns the current document count threshold for compound files.
+   *
+   * @return the document count threshold
+   */
+  public int getCfsThresholdDocSize() {
+    return this.cfsThresholdDocSize;
+  }
+
+  /**
+   * Returns the current byte size threshold for compound files.
+   *
+   * @return the byte size threshold in bytes
+   */
+  public long getCfsThresholdByteSize() {
+    return this.cfsThresholdByteSize;
+  }
+
+  /**
+   * Enables or disables the use of compound files entirely. When disabled, no segments will use
+   * compound files regardless of other settings.
+   *
+   * @param useCompoundFile true to enable compound files, false to disable
+   */
+  public void setShouldUseCompoundFile(boolean useCompoundFile) {
+    this.shouldUseCompoundFile = useCompoundFile;
+  }
+
+  /**
+   * Returns whether compound files are enabled.
+   *
+   * @return true if compound files are enabled, false otherwise
+   */
+  public boolean getShouldUseCompoundFile() {
+    return this.shouldUseCompoundFile;
+  }
+
+  /**
+   * Returns the largest size allowed for a compound file segment in megabytes. Segments larger than
+   * this size will not use compound files even if otherwise eligible.
+   *
+   * @return the maximum compound file segment size in MB
+   */
+  public double getMaxCFSSegmentSizeMB() {
+    return maxCFSSegmentSize / 1024. / 1024.;
+  }
+
+  /**
+   * Sets the maximum size limit for compound file segments in megabytes. If a merged segment will
+   * be larger than this value, it will be left as a non-compound file even if compound files are
+   * enabled. Set this to Double.POSITIVE_INFINITY (default) to always use CFS when other conditions
+   * are met.
+   *
+   * @param v the maximum segment size in MB (must be >= 0)
+   * @throws IllegalArgumentException if v is negative
+   */
+  public void setMaxCFSSegmentSizeMB(double v) {
+    if (v < 0.0) {
+      throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
+    }
+    v *= 1024 * 1024; // Convert MB to bytes
+    this.maxCFSSegmentSize = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
+  }
+
+  /**
+   * Determines whether a segment should use the compound file format based on its size and merge
+   * policy.
+   *
+   * <p>The decision logic is as follows:
+   *
+   * <ol>
+   *   <li>If compound files are disabled globally, return false
+   *   <li>If segment size exceeds the maximum CFS segment size, return false
+   *   <li>For LogDocMergePolicy: use CFS if document count ≤ document threshold
+   *   <li>For other merge policies: use CFS if byte size ≤ byte threshold
+   * </ol>
+   *
+   * @param mergedInfoSize the size of the segment (document count for LogDocMergePolicy, bytes for
+   *     others)
+   * @param mergePolicy the merge policy being used
+   * @return true if the segment should use compound file format, false otherwise
+   * @throws IOException if an I/O error occurs
+   */
+  public boolean useCompoundFile(long mergedInfoSize, MergePolicy mergePolicy) throws IOException {
+    // Check if compound files are globally disabled
+    if (this.shouldUseCompoundFile == false) {
+      return false;
+    }
+
+    // Check if segment exceeds maximum allowed size for CFS
+    if (mergedInfoSize > maxCFSSegmentSize) {
+      return false;
+    }
+
+    // Apply appropriate threshold based on merge policy type
+    if (mergePolicy instanceof LogDocMergePolicy) {
+      // For LogDocMergePolicy, mergedInfoSize represents document count
+      return mergedInfoSize <= this.cfsThresholdDocSize;
+    } else {
+      // For other policies, mergedInfoSize represents byte size
+      return mergedInfoSize <= this.cfsThresholdByteSize;
+    }
+  }
+
   /** Returns a Directory view (read-only) for the compound files in this segment */
   public abstract CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si)
       throws IOException;

diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java
@@ -75,38 +75,11 @@ public MergeSpecification findFullFlushMerges(
     return in.findFullFlushMerges(mergeTrigger, segmentInfos, mergeContext);
   }
 
-  @Override
-  public boolean useCompoundFile(
-      SegmentInfos infos, SegmentCommitInfo mergedInfo, MergeContext mergeContext)
-      throws IOException {
-    return in.useCompoundFile(infos, mergedInfo, mergeContext);
-  }
-
   @Override
   protected long size(SegmentCommitInfo info, MergeContext context) throws IOException {
     return in.size(info, context);
   }
 
-  @Override
-  public double getNoCFSRatio() {
-    return in.getNoCFSRatio();
-  }
-
-  @Override
-  public final void setNoCFSRatio(double noCFSRatio) {
-    in.setNoCFSRatio(noCFSRatio);
-  }
-
-  @Override
-  public final void setMaxCFSSegmentSizeMB(double v) {
-    in.setMaxCFSSegmentSizeMB(v);
-  }
-
-  @Override
-  public final double getMaxCFSSegmentSizeMB() {
-    return in.getMaxCFSSegmentSizeMB();
-  }
-
   @Override
   public String toString() {
     return getClass().getSimpleName() + "(" + in + ")";

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -3480,7 +3480,13 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException
     boolean useCompoundFile;
     synchronized (this) {
       merge.checkAborted();
-      useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.getMergeInfo(), this);
+      useCompoundFile =
+          merge
+              .getMergeInfo()
+              .info
+              .getCodec()
+              .compoundFormat()
+              .useCompoundFile(mergePolicy.size(merge.getMergeInfo(), this), mergePolicy);
     }
 
     // Now create the compound file if needed
@@ -5336,7 +5342,13 @@ public int length() {
       // this segment:
       boolean useCompoundFile;
       synchronized (this) { // Guard segmentInfos
-        useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info, this);
+        useCompoundFile =
+            merge
+                .getMergeInfo()
+                .info
+                .getCodec()
+                .compoundFormat()
+                .useCompoundFile(mergePolicy.size(merge.info, this), mergePolicy);
       }
 
       if (useCompoundFile) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
@@ -368,9 +368,7 @@ public InfoStream getInfoStream() {
    *
    * <p>Use <code>false</code> for batch indexing with very large ram buffer settings.
    *
-   * <p><b>Note: To control compound file usage during segment merges see {@link
-   * MergePolicy#setNoCFSRatio(double)} and {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This
-   * setting only applies to newly created segments.</b>
+   * <p><b>Note: To control compound file usage during segment merges.</b>
    */
   public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
     this.useCompoundFile = useCompoundFile;