grimmory-tools · balazs-szucs · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -13,7 +13,7 @@ plugins {
 
 allprojects {
     group = "org.grimmory"
-    version = "0.13.0"
+    version = "0.14.0"
 
     repositories {
         mavenCentral()

diff --git a/src/main/java/org/grimmory/pdfium4j/PdfDocument.java b/src/main/java/org/grimmory/pdfium4j/PdfDocument.java
@@ -51,6 +51,7 @@ public final class PdfDocument implements AutoCloseable {
   private final Thread ownerThread;
   private final List<PdfPage> openPages;
   private volatile boolean closed = false;
+  private volatile boolean structurallyModified = false;
   private final Map<MetadataTag, String> pendingMetadata = new LinkedHashMap<>();
   private String pendingXmpMetadata = null;
 
@@ -685,6 +686,7 @@ public void deletePage(int pageIndex) {
     }
     try {
       EditBindings.FPDFPage_Delete.invokeExact(handle, pageIndex);
+      structurallyModified = true;
     } catch (Throwable t) {
       throw new PdfiumException("Failed to delete page " + pageIndex, t);
     }
@@ -712,6 +714,7 @@ public void insertBlankPage(int pageIndex, PageSize size) {
         if (generated == 0) {
           throw new PdfiumException("FPDFPage_GenerateContent failed for index " + pageIndex);
         }
+        structurallyModified = true;
       } finally {
         ViewBindings.FPDF_ClosePage.invokeExact(pageSeg);
       }
@@ -751,6 +754,7 @@ public void importPages(PdfDocument source, String pageRange, int insertIndex) {
       if (ok == 0) {
         throw new PdfiumException("FPDF_ImportPages failed for range: " + pageRange);
       }
+      structurallyModified = true;
     } catch (PdfiumException e) {
       throw e;
     } catch (Throwable t) {
@@ -1278,8 +1282,36 @@ public byte[] saveToBytes() {
   public byte[] saveToBytes(SaveOptions options) {
     ensureOpen();
     Map<MetadataTag, String> mergedMetadata = buildMergedMetadata();
+    boolean hasMetadataUpdate =
+        (mergedMetadata != null && !mergedMetadata.isEmpty())
+            || (pendingXmpMetadata != null && !pendingXmpMetadata.isEmpty());
+    byte[] originalBytes = (!structurallyModified && hasMetadataUpdate) ? getOriginalBytes() : null;
     return PdfSaver.saveToBytes(
-        handle, mergedMetadata, pendingXmpMetadata, options.skipValidation());
+        handle, mergedMetadata, pendingXmpMetadata, options.skipValidation(), originalBytes);
+  }
+
+  /**
+   * Get the original PDF bytes for this document, if available. Used for metadata-only saves to
+   * avoid re-serializing through PDFium (which unpacks Object Streams and causes bloating).
+   *
+   * @return original bytes, or {@code null} if not available (e.g. document was created new)
+   */
+  private byte[] getOriginalBytes() {
+    if (sourceBytes != null) {
+      return sourceBytes;
+    }
+    if (sourcePath != null) {
+      try {
+        return Files.readAllBytes(sourcePath);
+      } catch (IOException e) {
+        LOG.log(
+            System.Logger.Level.WARNING,
+            "Could not read original bytes from {0}; falling back to native save",
+            sourcePath);
+        return null;
+      }
+    }
+    return null;
   }
 
   /**

diff --git a/src/main/java/org/grimmory/pdfium4j/PdfSaver.java b/src/main/java/org/grimmory/pdfium4j/PdfSaver.java
@@ -73,7 +73,7 @@ private PdfSaver() {}
    */
   static byte[] saveToBytes(
       MemorySegment docHandle, Map<MetadataTag, String> pendingMetadata, String pendingXmp) {
-    return saveToBytes(docHandle, pendingMetadata, pendingXmp, false);
+    return saveToBytes(docHandle, pendingMetadata, pendingXmp, false, null);
   }
 
   /**
@@ -82,16 +82,29 @@ static byte[] saveToBytes(
    * @param skipValidation when {@code true}, skip the re-parse validation step after appending an
    *     incremental update. Eliminates a full PDF re-open (~30-40% of save time). Safe for
    *     metadata-only changes.
+   * @param originalBytes when non-null, use these as the base PDF bytes instead of calling
+   *     FPDF_SaveAsCopy. This avoids re-serializing through PDFium which unpacks Object Streams and
+   *     causes massive file bloating on complex PDFs.
    */
   static byte[] saveToBytes(
       MemorySegment docHandle,
       Map<MetadataTag, String> pendingMetadata,
       String pendingXmp,
-      boolean skipValidation) {
-    byte[] baseBytes = nativeSave(docHandle);
-
+      boolean skipValidation,
+      byte[] originalBytes) {
     boolean hasInfoUpdate = pendingMetadata != null && !pendingMetadata.isEmpty();
     boolean hasXmpUpdate = pendingXmp != null && !pendingXmp.isEmpty();
+
+    // When original bytes are available and we have metadata to write,
+    // skip native save entirely — append incremental update directly to the
+    // original file bytes. This preserves Object Streams and prevents bloating.
+    byte[] baseBytes;
+    if (originalBytes != null && (hasInfoUpdate || hasXmpUpdate)) {
+      baseBytes = originalBytes;
+    } else {
+      baseBytes = nativeSave(docHandle);
+    }
+
     if (!hasInfoUpdate && !hasXmpUpdate) {
       return baseBytes;
     }

diff --git a/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java b/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java
@@ -752,6 +752,31 @@ void insertAndDeletePage(@TempDir Path tempDir) throws IOException {
     }
   }
 
+  @Test
+  @EnabledIf("pdfiumAvailable")
+  void importPages(@TempDir Path tempDir) throws IOException {
+    Path testPdf = getTestPdf();
+    if (testPdf == null) return;
+
+    try (PdfDocument doc1 = PdfDocument.open(testPdf);
+        PdfDocument doc2 = PdfDocument.open(testPdf)) {
+      int initialCount = doc1.pageCount();
+      doc1.importPages(doc2, "1", initialCount);
+      assertEquals(initialCount + 1, doc1.pageCount());
+
+      doc1.importAllPages(doc2);
+      assertEquals(initialCount + 1 + initialCount, doc1.pageCount());
+
+      Path out = tempDir.resolve("merged.pdf");
+      doc1.save(out);
+      assertTrue(Files.exists(out));
+
+      try (PdfDocument merged = PdfDocument.open(out)) {
+        assertEquals(doc1.pageCount(), merged.pageCount());
+      }
+    }
+  }
+
   @Test
   @EnabledIf("pdfiumAvailable")
   void deletePageOutOfRange() throws IOException {
@@ -1667,4 +1692,107 @@ void xrefStreamPdfDoubleMetadataWrite(@TempDir Path tempDir) throws IOException
       assertEquals(List.of("Second Author"), parsed.creators());
     }
   }
+
+  @Test
+  @EnabledIf("pdfiumAvailable")
+  void metadataOnlySaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException {
+    Path testPdf = getTestPdf();
+    if (testPdf == null) return;
+
+    long originalSize = Files.size(testPdf);
+
+    Path output = tempDir.resolve("metadata-only.pdf");
+    try (PdfDocument doc = PdfDocument.open(testPdf)) {
+      doc.setMetadata(MetadataTag.TITLE, "New Title");
+      doc.setMetadata(MetadataTag.AUTHOR, "New Author");
+      doc.setMetadata(MetadataTag.KEYWORDS, "keyword1; keyword2");
+      doc.save(output);
+    }
+
+    long savedSize = Files.size(output);
+    // Incremental update should add only a few KB for metadata objects + xref,
+    // not re-serialize the entire PDF. Allow 5% overhead.
+    assertTrue(
+        savedSize <= originalSize * 1.05 + 4096,
+        "Metadata-only save bloated file from " + originalSize + " to " + savedSize + " bytes");
+
+    // Verify the saved PDF is valid and metadata is readable
+    try (PdfDocument doc = PdfDocument.open(output)) {
+      assertEquals("New Title", doc.metadata(MetadataTag.TITLE).orElse(""));
+      assertEquals("New Author", doc.metadata(MetadataTag.AUTHOR).orElse(""));
+      assertTrue(doc.pageCount() > 0, "Saved PDF must have pages");
+    }
+  }
+
+  @Test
+  @EnabledIf("pdfiumAvailable")
+  void metadataAndXmpSaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException {
+    Path testPdf = getTestPdf();
+    if (testPdf == null) return;
+
+    long originalSize = Files.size(testPdf);
+
+    Path output = tempDir.resolve("xmp-metadata.pdf");
+    try (PdfDocument doc = PdfDocument.open(testPdf)) {
+      doc.setMetadata(MetadataTag.TITLE, "XMP Title");
+      doc.setMetadata(MetadataTag.AUTHOR, "XMP Author");
+      doc.setXmpMetadata(buildBookloreXmp("XMP Title", "XMP Author"));
+      doc.save(output);
+    }
+
+    long savedSize = Files.size(output);
+    assertTrue(
+        savedSize <= originalSize * 1.05 + 8192,
+        "Metadata+XMP save bloated file from " + originalSize + " to " + savedSize + " bytes");
+
+    try (PdfDocument doc = PdfDocument.open(output)) {
+      assertEquals("XMP Title", doc.metadata(MetadataTag.TITLE).orElse(""));
+      XmpMetadata parsed = XmpMetadataParser.parse(doc.xmpMetadata());
+      assertEquals("XMP Title", parsed.title().orElse(""));
+      assertTrue(doc.pageCount() > 0, "Saved PDF must have pages");
+    }
+  }
+
+  @Test
+  @EnabledIf("pdfiumAvailable")
+  void structuralChangeStillUsesNativeSave(@TempDir Path tempDir) throws IOException {
+    Path testPdf = getTestPdf();
+    if (testPdf == null) return;
+
+    Path output = tempDir.resolve("structural.pdf");
+    try (PdfDocument doc = PdfDocument.open(testPdf)) {
+      int originalCount = doc.pageCount();
+      doc.insertBlankPage(originalCount, PageSize.A4);
+      doc.setMetadata(MetadataTag.TITLE, "Structural Change");
+      doc.save(output);
+
+      // Re-open and verify the structural change persisted
+      try (PdfDocument saved = PdfDocument.open(output)) {
+        assertEquals(originalCount + 1, saved.pageCount());
+        assertEquals("Structural Change", saved.metadata(MetadataTag.TITLE).orElse(""));
+      }
+    }
+  }
+
+  @Test
+  @EnabledIf("pdfiumAvailable")
+  void metadataOnlySaveFromBytesDoesNotBloat() {
+    byte[] pdf = minimalPdfWithText();
+    int originalSize = pdf.length;
+
+    byte[] saved;
+    try (PdfDocument doc = PdfDocument.open(pdf)) {
+      doc.setMetadata(MetadataTag.TITLE, "From Bytes Title");
+      saved = doc.saveToBytes();
+    }
+
+    // Should not be dramatically larger than original
+    assertTrue(
+        saved.length <= originalSize * 1.5 + 4096,
+        "Metadata-only save from bytes bloated from " + originalSize + " to " + saved.length);
+
+    try (PdfDocument doc = PdfDocument.open(saved)) {
+      assertEquals("From Bytes Title", doc.metadata(MetadataTag.TITLE).orElse(""));
+    }
+  }
 }