Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ plugins {

allprojects {
group = "org.grimmory"
version = "0.13.0"
version = "0.14.0"

repositories {
mavenCentral()
Expand Down
34 changes: 33 additions & 1 deletion src/main/java/org/grimmory/pdfium4j/PdfDocument.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public final class PdfDocument implements AutoCloseable {
private final Thread ownerThread;
private final List<PdfPage> openPages;
private volatile boolean closed = false;
private volatile boolean structurallyModified = false;
private final Map<MetadataTag, String> pendingMetadata = new LinkedHashMap<>();
private String pendingXmpMetadata = null;

Expand Down Expand Up @@ -685,6 +686,7 @@ public void deletePage(int pageIndex) {
}
try {
EditBindings.FPDFPage_Delete.invokeExact(handle, pageIndex);
structurallyModified = true;
} catch (Throwable t) {
throw new PdfiumException("Failed to delete page " + pageIndex, t);
}
Expand Down Expand Up @@ -712,6 +714,7 @@ public void insertBlankPage(int pageIndex, PageSize size) {
if (generated == 0) {
throw new PdfiumException("FPDFPage_GenerateContent failed for index " + pageIndex);
}
structurallyModified = true;
} finally {
ViewBindings.FPDF_ClosePage.invokeExact(pageSeg);
}
Expand Down Expand Up @@ -751,6 +754,7 @@ public void importPages(PdfDocument source, String pageRange, int insertIndex) {
if (ok == 0) {
throw new PdfiumException("FPDF_ImportPages failed for range: " + pageRange);
}
structurallyModified = true;
} catch (PdfiumException e) {
throw e;
} catch (Throwable t) {
Expand Down Expand Up @@ -1278,8 +1282,36 @@ public byte[] saveToBytes() {
public byte[] saveToBytes(SaveOptions options) {
ensureOpen();
Map<MetadataTag, String> mergedMetadata = buildMergedMetadata();
boolean hasMetadataUpdate =
(mergedMetadata != null && !mergedMetadata.isEmpty())
|| (pendingXmpMetadata != null && !pendingXmpMetadata.isEmpty());
byte[] originalBytes = (!structurallyModified && hasMetadataUpdate) ? getOriginalBytes() : null;
return PdfSaver.saveToBytes(
handle, mergedMetadata, pendingXmpMetadata, options.skipValidation());
handle, mergedMetadata, pendingXmpMetadata, options.skipValidation(), originalBytes);
}

/**
* Get the original PDF bytes for this document, if available. Used for metadata-only saves to
* avoid re-serializing through PDFium (which unpacks Object Streams and causes bloating).
*
* @return original bytes, or {@code null} if not available (e.g. document was created new)
*/
private byte[] getOriginalBytes() {
if (sourceBytes != null) {
return sourceBytes;
}
if (sourcePath != null) {
try {
return Files.readAllBytes(sourcePath);
} catch (IOException e) {
LOG.log(
System.Logger.Level.WARNING,
"Could not read original bytes from {0}; falling back to native save",
sourcePath);
return null;
}
}
return null;
}

/**
Expand Down
21 changes: 17 additions & 4 deletions src/main/java/org/grimmory/pdfium4j/PdfSaver.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ private PdfSaver() {}
*/
static byte[] saveToBytes(
MemorySegment docHandle, Map<MetadataTag, String> pendingMetadata, String pendingXmp) {
return saveToBytes(docHandle, pendingMetadata, pendingXmp, false);
return saveToBytes(docHandle, pendingMetadata, pendingXmp, false, null);
}

/**
Expand All @@ -82,16 +82,29 @@ static byte[] saveToBytes(
* @param skipValidation when {@code true}, skip the re-parse validation step after appending an
* incremental update. Eliminates a full PDF re-open (~30-40% of save time). Safe for
* metadata-only changes.
* @param originalBytes when non-null, use these as the base PDF bytes instead of calling
* FPDF_SaveAsCopy. This avoids re-serializing through PDFium which unpacks Object Streams and
* causes massive file bloating on complex PDFs.
*/
static byte[] saveToBytes(
MemorySegment docHandle,
Map<MetadataTag, String> pendingMetadata,
String pendingXmp,
boolean skipValidation) {
byte[] baseBytes = nativeSave(docHandle);

boolean skipValidation,
byte[] originalBytes) {
boolean hasInfoUpdate = pendingMetadata != null && !pendingMetadata.isEmpty();
boolean hasXmpUpdate = pendingXmp != null && !pendingXmp.isEmpty();

// When original bytes are available and we have metadata to write,
// skip native save entirely — append incremental update directly to the
// original file bytes. This preserves Object Streams and prevents bloating.
byte[] baseBytes;
if (originalBytes != null && (hasInfoUpdate || hasXmpUpdate)) {
baseBytes = originalBytes;
} else {
baseBytes = nativeSave(docHandle);
}

if (!hasInfoUpdate && !hasXmpUpdate) {
return baseBytes;
}
Expand Down
128 changes: 128 additions & 0 deletions src/test/java/org/grimmory/pdfium4j/PdfDocumentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,31 @@ void insertAndDeletePage(@TempDir Path tempDir) throws IOException {
}
}

@Test
@EnabledIf("pdfiumAvailable")
void importPages(@TempDir Path tempDir) throws IOException {
Path testPdf = getTestPdf();
if (testPdf == null) return;

try (PdfDocument doc1 = PdfDocument.open(testPdf);
PdfDocument doc2 = PdfDocument.open(testPdf)) {
int initialCount = doc1.pageCount();
doc1.importPages(doc2, "1", initialCount);
assertEquals(initialCount + 1, doc1.pageCount());

doc1.importAllPages(doc2);
assertEquals(initialCount + 1 + initialCount, doc1.pageCount());

Path out = tempDir.resolve("merged.pdf");
doc1.save(out);
assertTrue(Files.exists(out));

try (PdfDocument merged = PdfDocument.open(out)) {
assertEquals(doc1.pageCount(), merged.pageCount());
}
}
}

@Test
@EnabledIf("pdfiumAvailable")
void deletePageOutOfRange() throws IOException {
Expand Down Expand Up @@ -1667,4 +1692,107 @@ void xrefStreamPdfDoubleMetadataWrite(@TempDir Path tempDir) throws IOException
assertEquals(List.of("Second Author"), parsed.creators());
}
}

@Test
@EnabledIf("pdfiumAvailable")
void metadataOnlySaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException {
Path testPdf = getTestPdf();
if (testPdf == null) return;

long originalSize = Files.size(testPdf);

Path output = tempDir.resolve("metadata-only.pdf");
try (PdfDocument doc = PdfDocument.open(testPdf)) {
doc.setMetadata(MetadataTag.TITLE, "New Title");
doc.setMetadata(MetadataTag.AUTHOR, "New Author");
doc.setMetadata(MetadataTag.KEYWORDS, "keyword1; keyword2");
doc.save(output);
}

long savedSize = Files.size(output);
// Incremental update should add only a few KB for metadata objects + xref,
// not re-serialize the entire PDF. Allow 5% overhead.
assertTrue(
savedSize <= originalSize * 1.05 + 4096,
"Metadata-only save bloated file from " + originalSize + " to " + savedSize + " bytes");

// Verify the saved PDF is valid and metadata is readable
try (PdfDocument doc = PdfDocument.open(output)) {
assertEquals("New Title", doc.metadata(MetadataTag.TITLE).orElse(""));
assertEquals("New Author", doc.metadata(MetadataTag.AUTHOR).orElse(""));
assertTrue(doc.pageCount() > 0, "Saved PDF must have pages");
}
}

@Test
@EnabledIf("pdfiumAvailable")
void metadataAndXmpSaveDoesNotBloatFile(@TempDir Path tempDir) throws IOException {
Path testPdf = getTestPdf();
if (testPdf == null) return;

long originalSize = Files.size(testPdf);

Path output = tempDir.resolve("xmp-metadata.pdf");
try (PdfDocument doc = PdfDocument.open(testPdf)) {
doc.setMetadata(MetadataTag.TITLE, "XMP Title");
doc.setMetadata(MetadataTag.AUTHOR, "XMP Author");
doc.setXmpMetadata(buildBookloreXmp("XMP Title", "XMP Author"));
doc.save(output);
}

long savedSize = Files.size(output);
assertTrue(
savedSize <= originalSize * 1.05 + 8192,
"Metadata+XMP save bloated file from " + originalSize + " to " + savedSize + " bytes");

try (PdfDocument doc = PdfDocument.open(output)) {
assertEquals("XMP Title", doc.metadata(MetadataTag.TITLE).orElse(""));
XmpMetadata parsed = XmpMetadataParser.parse(doc.xmpMetadata());
assertEquals("XMP Title", parsed.title().orElse(""));
assertTrue(doc.pageCount() > 0, "Saved PDF must have pages");
}
}

@Test
@EnabledIf("pdfiumAvailable")
void structuralChangeStillUsesNativeSave(@TempDir Path tempDir) throws IOException {
Path testPdf = getTestPdf();
if (testPdf == null) return;

Path output = tempDir.resolve("structural.pdf");
try (PdfDocument doc = PdfDocument.open(testPdf)) {
int originalCount = doc.pageCount();
doc.insertBlankPage(originalCount, PageSize.A4);
doc.setMetadata(MetadataTag.TITLE, "Structural Change");
doc.save(output);

// Re-open and verify the structural change persisted
try (PdfDocument saved = PdfDocument.open(output)) {
assertEquals(originalCount + 1, saved.pageCount());
assertEquals("Structural Change", saved.metadata(MetadataTag.TITLE).orElse(""));
}
}
}

@Test
@EnabledIf("pdfiumAvailable")
void metadataOnlySaveFromBytesDoesNotBloat() {
byte[] pdf = minimalPdfWithText();
int originalSize = pdf.length;

byte[] saved;
try (PdfDocument doc = PdfDocument.open(pdf)) {
doc.setMetadata(MetadataTag.TITLE, "From Bytes Title");
saved = doc.saveToBytes();
}

// Should not be dramatically larger than original
assertTrue(
saved.length <= originalSize * 1.5 + 4096,
"Metadata-only save from bytes bloated from " + originalSize + " to " + saved.length);

try (PdfDocument doc = PdfDocument.open(saved)) {
assertEquals("From Bytes Title", doc.metadata(MetadataTag.TITLE).orElse(""));
}
}
}
Loading