Skip to content

Commit 0ee1f5e

Browse files
authored
feat: add new PDF document creation and image rendering capabilities (#20)
* feat: add new PDF document creation and image rendering capabilities * refactor: rename regex patterns for clarity and improve PDF metadata extraction logic * refactor: fix formatting
1 parent e51a68c commit 0ee1f5e

9 files changed

Lines changed: 715 additions & 36 deletions

File tree

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ plugins {
1313

1414
allprojects {
1515
group = "org.grimmory"
16-
version = "0.6.0"
16+
version = "0.9.0"
1717

1818
repositories {
1919
mavenCentral()

src/main/java/org/grimmory/pdfium4j/PdfDocument.java

Lines changed: 150 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import java.nio.file.Files;
1010
import java.nio.file.Path;
1111
import java.util.*;
12-
import java.util.stream.IntStream;
1312
import org.grimmory.pdfium4j.exception.PdfCorruptException;
1413
import org.grimmory.pdfium4j.exception.PdfPasswordException;
1514
import org.grimmory.pdfium4j.exception.PdfiumException;
@@ -227,6 +226,27 @@ public static Optional<String> koReaderPartialMd5(byte[] data) {
227226
return KoReaderChecksum.calculate(data);
228227
}
229228

229+
/**
230+
* Create a new empty PDF document.
231+
*
232+
* @throws PdfiumException if the document cannot be created
233+
*/
234+
public static PdfDocument create() {
235+
PdfiumLibrary.ensureInitialized();
236+
try {
237+
MemorySegment handle = (MemorySegment) EditBindings.FPDF_CreateNewDocument.invokeExact();
238+
if (handle.equals(MemorySegment.NULL)) {
239+
throw new PdfiumException("Failed to create new PDF document");
240+
}
241+
return new PdfDocument(
242+
handle, null, null, null, PdfProcessingPolicy.defaultPolicy(), Thread.currentThread());
243+
} catch (PdfiumException e) {
244+
throw e;
245+
} catch (Throwable t) {
246+
throw new PdfiumException("Failed to create new PDF document", t);
247+
}
248+
}
249+
230250
/**
231251
* Open a PDF from a file path.
232252
*
@@ -405,6 +425,7 @@ public PdfPage page(int index) {
405425
PdfPage page =
406426
new PdfPage(
407427
pageSeg,
428+
handle,
408429
ownerThread,
409430
policy.maxRenderPixels(),
410431
() -> unregisterPage(holder[0]),
@@ -569,8 +590,14 @@ public Map<Integer, RenderResult> renderPages(int startIndex, int endIndex, int
569590
+ " pages");
570591
}
571592

572-
List<Integer> indices = IntStream.rangeClosed(startIndex, endIndex).boxed().toList();
573-
return renderPages(indices, dpi);
593+
ensureOpen();
594+
Map<Integer, RenderResult> results = new LinkedHashMap<>(endIndex - startIndex + 1);
595+
for (int i = startIndex; i <= endIndex; i++) {
596+
try (PdfPage page = page(i)) {
597+
results.put(i, page.render(dpi));
598+
}
599+
}
600+
return Collections.unmodifiableMap(results);
574601
}
575602

576603
/**
@@ -580,8 +607,44 @@ public Map<Integer, RenderResult> renderPages(int startIndex, int endIndex, int
580607
* @return map of page index to render result, in iteration order
581608
*/
582609
public Map<Integer, RenderResult> renderAllPages(int dpi) {
583-
List<Integer> indices = IntStream.range(0, pageCount()).boxed().toList();
584-
return renderPages(indices, dpi);
610+
return renderPages(0, pageCount() - 1, dpi);
611+
}
612+
613+
/**
614+
* Render a single page and return encoded image bytes. This is a convenience method that handles
615+
* page opening, rendering, encoding, and resource cleanup in a single call.
616+
*
617+
* @param pageIndex 0-based page index
618+
* @param dpi render resolution (e.g. 150 for thumbnails, 300 for high quality)
619+
* @param format image format: "jpeg" or "png"
620+
* @return encoded image bytes
621+
* @throws IllegalArgumentException if format is not "jpeg" or "png", or pageIndex is invalid
622+
*/
623+
public byte[] renderPageToBytes(int pageIndex, int dpi, String format) {
624+
return renderPageToBytes(pageIndex, dpi, format, 0.85f);
625+
}
626+
627+
/**
628+
* Render a single page and return encoded image bytes with configurable JPEG quality.
629+
*
630+
* @param pageIndex 0-based page index
631+
* @param dpi render resolution
632+
* @param format image format: "jpeg" or "png"
633+
* @param jpegQuality JPEG quality from 0.0 to 1.0 (ignored for PNG)
634+
* @return encoded image bytes
635+
* @throws IllegalArgumentException if format is not "jpeg" or "png", or pageIndex is invalid
636+
*/
637+
public byte[] renderPageToBytes(int pageIndex, int dpi, String format, float jpegQuality) {
638+
Objects.requireNonNull(format, "format");
639+
String fmt = format.toLowerCase(java.util.Locale.ROOT);
640+
if (!fmt.equals("jpeg") && !fmt.equals("png")) {
641+
throw new IllegalArgumentException("Format must be 'jpeg' or 'png', got: " + format);
642+
}
643+
644+
try (PdfPage page = page(pageIndex)) {
645+
RenderResult result = page.render(dpi);
646+
return fmt.equals("png") ? result.toPngBytes() : result.toJpegBytes(jpegQuality);
647+
}
585648
}
586649

587650
/**
@@ -818,6 +881,43 @@ public Optional<String> metadata(MetadataTag tag) {
818881
}
819882
}
820883

884+
/**
885+
* Get a metadata value by an arbitrary Info Dictionary key string. This allows reading
886+
* non-standard keys like "EBX_PUBLISHER" that are not covered by {@link MetadataTag}.
887+
*
888+
* @param key the raw Info Dictionary key name (e.g. "Title", "EBX_PUBLISHER")
889+
* @return the value, or empty if not present
890+
*/
891+
public Optional<String> metadata(String key) {
892+
ensureOpen();
893+
Objects.requireNonNull(key, "key");
894+
895+
MetadataTag standardTag = MetadataTag.fromKey(key);
896+
if (standardTag != null && pendingMetadata.containsKey(standardTag)) {
897+
String value = pendingMetadata.get(standardTag);
898+
return (value == null || value.isEmpty()) ? Optional.empty() : Optional.of(value);
899+
}
900+
901+
try (Arena arena = Arena.ofConfined()) {
902+
MemorySegment keySeg = arena.allocateFrom(key);
903+
904+
long needed =
905+
(long) DocBindings.FPDF_GetMetaText.invokeExact(handle, keySeg, MemorySegment.NULL, 0L);
906+
if (needed <= 2) return Optional.empty();
907+
908+
MemorySegment buf = arena.allocate(needed);
909+
long written = (long) DocBindings.FPDF_GetMetaText.invokeExact(handle, keySeg, buf, needed);
910+
if (written <= 2) {
911+
return Optional.empty();
912+
}
913+
914+
String value = FfmHelper.fromWideString(buf, needed);
915+
return value.isEmpty() ? Optional.empty() : Optional.of(value);
916+
} catch (Throwable t) {
917+
throw new PdfiumException("Failed to read metadata: " + key, t);
918+
}
919+
}
920+
821921
/** Get all standard metadata as a map. Only non-empty values are included. */
822922
public Map<String, String> metadata() {
823923
Map<String, String> map = new LinkedHashMap<>();
@@ -994,7 +1094,19 @@ private static byte[] extractXmpPacketFromFile(Path path) {
9941094
System.arraycopy(buf, available - carry, buf, 0, carry);
9951095
}
9961096

997-
if (lastBeginFilePos < 0) return new byte[0];
1097+
if (lastBeginFilePos < 0) {
1098+
// Fallback: read entire file and scan for <x:xmpmeta> ... </x:xmpmeta>
1099+
channel.position(0);
1100+
byte[] allBytes = new byte[(int) fileSize];
1101+
var allBuf = java.nio.ByteBuffer.wrap(allBytes);
1102+
int totalRead = 0;
1103+
while (totalRead < fileSize) {
1104+
int n = channel.read(allBuf, totalRead);
1105+
if (n < 0) break;
1106+
totalRead += n;
1107+
}
1108+
return extractXmpmetaFallback(allBytes);
1109+
}
9981110

9991111
// Phase 2: find <?xpacket end=...?> after the last begin marker
10001112
offset = lastBeginFilePos;
@@ -1051,16 +1163,41 @@ private static byte[] extractXmpPacket(byte[] pdf) {
10511163
lastBeginPos = pos;
10521164
searchFrom = pos + 1;
10531165
}
1054-
if (lastBeginPos < 0) return new byte[0];
10551166

1056-
int endPos = indexOf(pdf, endMarker, lastBeginPos);
1057-
if (endPos < 0) return new byte[0];
1167+
if (lastBeginPos >= 0) {
1168+
int endPos = indexOf(pdf, endMarker, lastBeginPos);
1169+
if (endPos >= 0) {
1170+
int endTagClose =
1171+
indexOf(pdf, "?>".getBytes(java.nio.charset.StandardCharsets.US_ASCII), endPos);
1172+
if (endTagClose >= 0) {
1173+
int packetEnd = endTagClose + 2;
1174+
byte[] xmp = new byte[packetEnd - lastBeginPos];
1175+
System.arraycopy(pdf, lastBeginPos, xmp, 0, xmp.length);
1176+
return xmp;
1177+
}
1178+
}
1179+
}
10581180

1059-
int endTagClose =
1060-
indexOf(pdf, "?>".getBytes(java.nio.charset.StandardCharsets.US_ASCII), endPos);
1061-
if (endTagClose < 0) return new byte[0];
1062-
int packetEnd = endTagClose + 2;
1181+
// Fallback: scan for <x:xmpmeta ...> ... </x:xmpmeta> (no xpacket wrapper)
1182+
return extractXmpmetaFallback(pdf);
1183+
}
10631184

1185+
private static byte[] extractXmpmetaFallback(byte[] pdf) {
1186+
byte[] beginTag = "<x:xmpmeta".getBytes(java.nio.charset.StandardCharsets.US_ASCII);
1187+
byte[] endTag = "</x:xmpmeta>".getBytes(java.nio.charset.StandardCharsets.US_ASCII);
1188+
// Find LAST <x:xmpmeta occurrence
1189+
int lastBeginPos = -1;
1190+
int searchFrom = 0;
1191+
while (searchFrom < pdf.length) {
1192+
int pos = indexOf(pdf, beginTag, searchFrom);
1193+
if (pos < 0) break;
1194+
lastBeginPos = pos;
1195+
searchFrom = pos + 1;
1196+
}
1197+
if (lastBeginPos < 0) return new byte[0];
1198+
int endPos = indexOf(pdf, endTag, lastBeginPos);
1199+
if (endPos < 0) return new byte[0];
1200+
int packetEnd = endPos + endTag.length;
10641201
byte[] xmp = new byte[packetEnd - lastBeginPos];
10651202
System.arraycopy(pdf, lastBeginPos, xmp, 0, xmp.length);
10661203
return xmp;

0 commit comments

Comments
 (0)