diff --git a/build.gradle.kts b/build.gradle.kts index cc898cb..f382283 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -13,7 +13,7 @@ plugins { allprojects { group = "org.grimmory" - version = "0.13.0" + version = "0.14.0" repositories { mavenCentral() diff --git a/src/main/java/org/grimmory/pdfium4j/PdfDocument.java b/src/main/java/org/grimmory/pdfium4j/PdfDocument.java index 77e9313..609a0dd 100644 --- a/src/main/java/org/grimmory/pdfium4j/PdfDocument.java +++ b/src/main/java/org/grimmory/pdfium4j/PdfDocument.java @@ -51,6 +51,7 @@ public final class PdfDocument implements AutoCloseable { private final Thread ownerThread; private final List openPages; private volatile boolean closed = false; + private volatile boolean structurallyModified = false; private final Map pendingMetadata = new LinkedHashMap<>(); private String pendingXmpMetadata = null; @@ -685,6 +686,7 @@ public void deletePage(int pageIndex) { } try { EditBindings.FPDFPage_Delete.invokeExact(handle, pageIndex); + structurallyModified = true; } catch (Throwable t) { throw new PdfiumException("Failed to delete page " + pageIndex, t); } @@ -712,6 +714,7 @@ public void insertBlankPage(int pageIndex, PageSize size) { if (generated == 0) { throw new PdfiumException("FPDFPage_GenerateContent failed for index " + pageIndex); } + structurallyModified = true; } finally { ViewBindings.FPDF_ClosePage.invokeExact(pageSeg); } @@ -751,6 +754,7 @@ public void importPages(PdfDocument source, String pageRange, int insertIndex) { if (ok == 0) { throw new PdfiumException("FPDF_ImportPages failed for range: " + pageRange); } + structurallyModified = true; } catch (PdfiumException e) { throw e; } catch (Throwable t) { @@ -1278,8 +1282,36 @@ public byte[] saveToBytes() { public byte[] saveToBytes(SaveOptions options) { ensureOpen(); Map mergedMetadata = buildMergedMetadata(); + boolean hasMetadataUpdate = + (mergedMetadata != null && !mergedMetadata.isEmpty()) + || (pendingXmpMetadata != null && !pendingXmpMetadata.isEmpty()); + byte[] originalBytes = (!structurallyModified && hasMetadataUpdate) ? getOriginalBytes() : null; return PdfSaver.saveToBytes( - handle, mergedMetadata, pendingXmpMetadata, options.skipValidation()); + handle, mergedMetadata, pendingXmpMetadata, options.skipValidation(), originalBytes); + } + + /** + * Get the original PDF bytes for this document, if available. Used for metadata-only saves to + * avoid re-serializing through PDFium (which unpacks Object Streams and causes bloating). + * + * @return original bytes, or {@code null} if not available (e.g. document was created new) + */ + private byte[] getOriginalBytes() { + if (sourceBytes != null) { + return sourceBytes; + } + if (sourcePath != null) { + try { + return Files.readAllBytes(sourcePath); + } catch (IOException e) { + LOG.log( + System.Logger.Level.WARNING, + "Could not read original bytes from {0}; falling back to native save", + sourcePath); + return null; + } + } + return null; } /** diff --git a/src/main/java/org/grimmory/pdfium4j/PdfSaver.java b/src/main/java/org/grimmory/pdfium4j/PdfSaver.java index 8adac2b..e3a2918 100644 --- a/src/main/java/org/grimmory/pdfium4j/PdfSaver.java +++ b/src/main/java/org/grimmory/pdfium4j/PdfSaver.java @@ -73,7 +73,7 @@ private PdfSaver() {} */ static byte[] saveToBytes( MemorySegment docHandle, Map pendingMetadata, String pendingXmp) { - return saveToBytes(docHandle, pendingMetadata, pendingXmp, false); + return saveToBytes(docHandle, pendingMetadata, pendingXmp, false, null); } /** @@ -82,16 +82,29 @@ static byte[] saveToBytes( * @param skipValidation when {@code true}, skip the re-parse validation step after appending an * incremental update. Eliminates a full PDF re-open (~30-40% of save time). Safe for * metadata-only changes. + * @param originalBytes when non-null, use these as the base PDF bytes instead of calling + * FPDF_SaveAsCopy. This avoids re-serializing through PDFium which unpacks Object Streams and + * causes massive file bloating on complex PDFs. */ static byte[] saveToBytes( MemorySegment docHandle, Map pendingMetadata, String pendingXmp, - boolean skipValidation) { - byte[] baseBytes = nativeSave(docHandle); - + boolean skipValidation, + byte[] originalBytes) { boolean hasInfoUpdate = pendingMetadata != null && !pendingMetadata.isEmpty(); boolean hasXmpUpdate = pendingXmp != null && !pendingXmp.isEmpty(); + + // When original bytes are available and we have metadata to write, + // skip native save entirely — append incremental update directly to the + // original file bytes. This preserves Object Streams and prevents bloating. + byte[] baseBytes; + if (originalBytes != null && (hasInfoUpdate || hasXmpUpdate)) { + baseBytes = originalBytes; + } else { + baseBytes = nativeSave(docHandle); + } + if (!hasInfoUpdate && !hasXmpUpdate) { return baseBytes; } diff --git a/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java b/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java index b661e8e..4cbf871 100644 --- a/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java +++ b/src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java @@ -752,6 +752,31 @@ void insertAndDeletePage(@TempDir Path tempDir) throws IOException { } } + @Test + @EnabledIf("pdfiumAvailable") + void importPages(@TempDir Path tempDir) throws IOException { + Path testPdf = getTestPdf(); + if (testPdf == null) return; + + try (PdfDocument doc1 = PdfDocument.open(testPdf); + PdfDocument doc2 = PdfDocument.open(testPdf)) { + int initialCount = doc1.pageCount(); + doc1.importPages(doc2, "1", initialCount); + assertEquals(initialCount + 1, doc1.pageCount()); + + doc1.importAllPages(doc2); + assertEquals(initialCount + 1 + initialCount, doc1.pageCount()); + + Path out = tempDir.resolve("merged.pdf"); + doc1.save(out); + assertTrue(Files.exists(out)); + + try (PdfDocument merged = PdfDocument.open(out)) { + assertEquals(doc1.pageCount(), merged.pageCount()); + } + } + } + @Test @EnabledIf("pdfiumAvailable") void deletePageOutOfRange() throws IOException { @@ -1667,4 +1692,107 @@ void xrefStreamPdfDoubleMetadataWrite(@TempDir Path tempDir) throws IOException assertEquals(List.of("Second Author"), parsed.creators()); } } + + @Test + @EnabledIf("pdfiumAvailable") + void metadataOnlySaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException { + Path testPdf = getTestPdf(); + if (testPdf == null) return; + + long originalSize = Files.size(testPdf); + + Path output = tempDir.resolve("metadata-only.pdf"); + try (PdfDocument doc = PdfDocument.open(testPdf)) { + doc.setMetadata(MetadataTag.TITLE, "New Title"); + doc.setMetadata(MetadataTag.AUTHOR, "New Author"); + doc.setMetadata(MetadataTag.KEYWORDS, "keyword1; keyword2"); + doc.save(output); + } + + long savedSize = Files.size(output); + // Incremental update should add only a few KB for metadata objects + xref, + // not re-serialize the entire PDF. Allow 5% overhead. + assertTrue( + savedSize <= originalSize * 1.05 + 4096, + "Metadata-only save bloated file from " + originalSize + " to " + savedSize + " bytes"); + + // Verify the saved PDF is valid and metadata is readable + try (PdfDocument doc = PdfDocument.open(output)) { + assertEquals("New Title", doc.metadata(MetadataTag.TITLE).orElse("")); + assertEquals("New Author", doc.metadata(MetadataTag.AUTHOR).orElse("")); + assertTrue(doc.pageCount() > 0, "Saved PDF must have pages"); + } + } + + @Test + @EnabledIf("pdfiumAvailable") + void metadataAndXmpSaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException { + Path testPdf = getTestPdf(); + if (testPdf == null) return; + + long originalSize = Files.size(testPdf); + + Path output = tempDir.resolve("xmp-metadata.pdf"); + try (PdfDocument doc = PdfDocument.open(testPdf)) { + doc.setMetadata(MetadataTag.TITLE, "XMP Title"); + doc.setMetadata(MetadataTag.AUTHOR, "XMP Author"); + doc.setXmpMetadata(buildBookloreXmp("XMP Title", "XMP Author")); + doc.save(output); + } + + long savedSize = Files.size(output); + assertTrue( + savedSize <= originalSize * 1.05 + 8192, + "Metadata+XMP save bloated file from " + originalSize + " to " + savedSize + " bytes"); + + try (PdfDocument doc = PdfDocument.open(output)) { + assertEquals("XMP Title", doc.metadata(MetadataTag.TITLE).orElse("")); + XmpMetadata parsed = XmpMetadataParser.parse(doc.xmpMetadata()); + assertEquals("XMP Title", parsed.title().orElse("")); + assertTrue(doc.pageCount() > 0, "Saved PDF must have pages"); + } + } + + @Test + @EnabledIf("pdfiumAvailable") + void structuralChangeStillUsesNativeSave(@TempDir Path tempDir) throws IOException { + Path testPdf = getTestPdf(); + if (testPdf == null) return; + + Path output = tempDir.resolve("structural.pdf"); + try (PdfDocument doc = PdfDocument.open(testPdf)) { + int originalCount = doc.pageCount(); + doc.insertBlankPage(originalCount, PageSize.A4); + doc.setMetadata(MetadataTag.TITLE, "Structural Change"); + doc.save(output); + + // Re-open and verify the structural change persisted + try (PdfDocument saved = PdfDocument.open(output)) { + assertEquals(originalCount + 1, saved.pageCount()); + assertEquals("Structural Change", saved.metadata(MetadataTag.TITLE).orElse("")); + } + } + } + + @Test + @EnabledIf("pdfiumAvailable") + void metadataOnlySaveFromBytesDoesNotBloat() { + byte[] pdf = minimalPdfWithText(); + int originalSize = pdf.length; + + byte[] saved; + try (PdfDocument doc = PdfDocument.open(pdf)) { + doc.setMetadata(MetadataTag.TITLE, "From Bytes Title"); + saved = doc.saveToBytes(); + } + + // Should not be dramatically larger than original + assertTrue( + saved.length <= originalSize * 1.5 + 4096, + "Metadata-only save from bytes bloated from " + originalSize + " to " + saved.length); + + try (PdfDocument doc = PdfDocument.open(saved)) { + assertEquals("From Bytes Title", doc.metadata(MetadataTag.TITLE).orElse("")); + } + } }