99import java .nio .file .Files ;
1010import java .nio .file .Path ;
1111import java .util .*;
12- import java .util .stream .IntStream ;
1312import org .grimmory .pdfium4j .exception .PdfCorruptException ;
1413import org .grimmory .pdfium4j .exception .PdfPasswordException ;
1514import org .grimmory .pdfium4j .exception .PdfiumException ;
@@ -227,6 +226,27 @@ public static Optional<String> koReaderPartialMd5(byte[] data) {
227226 return KoReaderChecksum .calculate (data );
228227 }
229228
229+ /**
230+ * Create a new empty PDF document.
231+ *
232+ * @throws PdfiumException if the document cannot be created
233+ */
234+ public static PdfDocument create () {
235+ PdfiumLibrary .ensureInitialized ();
236+ try {
237+ MemorySegment handle = (MemorySegment ) EditBindings .FPDF_CreateNewDocument .invokeExact ();
238+ if (handle .equals (MemorySegment .NULL )) {
239+ throw new PdfiumException ("Failed to create new PDF document" );
240+ }
241+ return new PdfDocument (
242+ handle , null , null , null , PdfProcessingPolicy .defaultPolicy (), Thread .currentThread ());
243+ } catch (PdfiumException e ) {
244+ throw e ;
245+ } catch (Throwable t ) {
246+ throw new PdfiumException ("Failed to create new PDF document" , t );
247+ }
248+ }
249+
230250 /**
231251 * Open a PDF from a file path.
232252 *
@@ -405,6 +425,7 @@ public PdfPage page(int index) {
405425 PdfPage page =
406426 new PdfPage (
407427 pageSeg ,
428+ handle ,
408429 ownerThread ,
409430 policy .maxRenderPixels (),
410431 () -> unregisterPage (holder [0 ]),
@@ -569,8 +590,14 @@ public Map<Integer, RenderResult> renderPages(int startIndex, int endIndex, int
569590 + " pages" );
570591 }
571592
572- List <Integer > indices = IntStream .rangeClosed (startIndex , endIndex ).boxed ().toList ();
573- return renderPages (indices , dpi );
593+ ensureOpen ();
594+ Map <Integer , RenderResult > results = new LinkedHashMap <>(endIndex - startIndex + 1 );
595+ for (int i = startIndex ; i <= endIndex ; i ++) {
596+ try (PdfPage page = page (i )) {
597+ results .put (i , page .render (dpi ));
598+ }
599+ }
600+ return Collections .unmodifiableMap (results );
574601 }
575602
576603 /**
@@ -580,8 +607,44 @@ public Map<Integer, RenderResult> renderPages(int startIndex, int endIndex, int
580607 * @return map of page index to render result, in iteration order
581608 */
582609 public Map <Integer , RenderResult > renderAllPages (int dpi ) {
583- List <Integer > indices = IntStream .range (0 , pageCount ()).boxed ().toList ();
584- return renderPages (indices , dpi );
610+ return renderPages (0 , pageCount () - 1 , dpi );
611+ }
612+
613+ /**
614+ * Render a single page and return encoded image bytes. This is a convenience method that handles
615+ * page opening, rendering, encoding, and resource cleanup in a single call.
616+ *
617+ * @param pageIndex 0-based page index
618+ * @param dpi render resolution (e.g. 150 for thumbnails, 300 for high quality)
619+ * @param format image format: "jpeg" or "png"
620+ * @return encoded image bytes
621+ * @throws IllegalArgumentException if format is not "jpeg" or "png", or pageIndex is invalid
622+ */
623+ public byte [] renderPageToBytes (int pageIndex , int dpi , String format ) {
624+ return renderPageToBytes (pageIndex , dpi , format , 0.85f );
625+ }
626+
627+ /**
628+ * Render a single page and return encoded image bytes with configurable JPEG quality.
629+ *
630+ * @param pageIndex 0-based page index
631+ * @param dpi render resolution
632+ * @param format image format: "jpeg" or "png"
633+ * @param jpegQuality JPEG quality from 0.0 to 1.0 (ignored for PNG)
634+ * @return encoded image bytes
635+ * @throws IllegalArgumentException if format is not "jpeg" or "png", or pageIndex is invalid
636+ */
637+ public byte [] renderPageToBytes (int pageIndex , int dpi , String format , float jpegQuality ) {
638+ Objects .requireNonNull (format , "format" );
639+ String fmt = format .toLowerCase (java .util .Locale .ROOT );
640+ if (!fmt .equals ("jpeg" ) && !fmt .equals ("png" )) {
641+ throw new IllegalArgumentException ("Format must be 'jpeg' or 'png', got: " + format );
642+ }
643+
644+ try (PdfPage page = page (pageIndex )) {
645+ RenderResult result = page .render (dpi );
646+ return fmt .equals ("png" ) ? result .toPngBytes () : result .toJpegBytes (jpegQuality );
647+ }
585648 }
586649
587650 /**
@@ -818,6 +881,43 @@ public Optional<String> metadata(MetadataTag tag) {
818881 }
819882 }
820883
884+ /**
885+ * Get a metadata value by an arbitrary Info Dictionary key string. This allows reading
886+ * non-standard keys like "EBX_PUBLISHER" that are not covered by {@link MetadataTag}.
887+ *
888+ * @param key the raw Info Dictionary key name (e.g. "Title", "EBX_PUBLISHER")
889+ * @return the value, or empty if not present
890+ */
891+ public Optional <String > metadata (String key ) {
892+ ensureOpen ();
893+ Objects .requireNonNull (key , "key" );
894+
895+ MetadataTag standardTag = MetadataTag .fromKey (key );
896+ if (standardTag != null && pendingMetadata .containsKey (standardTag )) {
897+ String value = pendingMetadata .get (standardTag );
898+ return (value == null || value .isEmpty ()) ? Optional .empty () : Optional .of (value );
899+ }
900+
901+ try (Arena arena = Arena .ofConfined ()) {
902+ MemorySegment keySeg = arena .allocateFrom (key );
903+
904+ long needed =
905+ (long ) DocBindings .FPDF_GetMetaText .invokeExact (handle , keySeg , MemorySegment .NULL , 0L );
906+ if (needed <= 2 ) return Optional .empty ();
907+
908+ MemorySegment buf = arena .allocate (needed );
909+ long written = (long ) DocBindings .FPDF_GetMetaText .invokeExact (handle , keySeg , buf , needed );
910+ if (written <= 2 ) {
911+ return Optional .empty ();
912+ }
913+
914+ String value = FfmHelper .fromWideString (buf , needed );
915+ return value .isEmpty () ? Optional .empty () : Optional .of (value );
916+ } catch (Throwable t ) {
917+ throw new PdfiumException ("Failed to read metadata: " + key , t );
918+ }
919+ }
920+
821921 /** Get all standard metadata as a map. Only non-empty values are included. */
822922 public Map <String , String > metadata () {
823923 Map <String , String > map = new LinkedHashMap <>();
@@ -994,7 +1094,19 @@ private static byte[] extractXmpPacketFromFile(Path path) {
9941094 System .arraycopy (buf , available - carry , buf , 0 , carry );
9951095 }
9961096
997- if (lastBeginFilePos < 0 ) return new byte [0 ];
1097+ if (lastBeginFilePos < 0 ) {
1098+ // Fallback: read entire file and scan for <x:xmpmeta> ... </x:xmpmeta>
1099+ channel .position (0 );
1100+ byte [] allBytes = new byte [(int ) fileSize ];
1101+ var allBuf = java .nio .ByteBuffer .wrap (allBytes );
1102+ int totalRead = 0 ;
1103+ while (totalRead < fileSize ) {
1104+ int n = channel .read (allBuf , totalRead );
1105+ if (n < 0 ) break ;
1106+ totalRead += n ;
1107+ }
1108+ return extractXmpmetaFallback (allBytes );
1109+ }
9981110
9991111 // Phase 2: find <?xpacket end=...?> after the last begin marker
10001112 offset = lastBeginFilePos ;
@@ -1051,16 +1163,41 @@ private static byte[] extractXmpPacket(byte[] pdf) {
10511163 lastBeginPos = pos ;
10521164 searchFrom = pos + 1 ;
10531165 }
1054- if (lastBeginPos < 0 ) return new byte [0 ];
10551166
1056- int endPos = indexOf (pdf , endMarker , lastBeginPos );
1057- if (endPos < 0 ) return new byte [0 ];
1167+ if (lastBeginPos >= 0 ) {
1168+ int endPos = indexOf (pdf , endMarker , lastBeginPos );
1169+ if (endPos >= 0 ) {
1170+ int endTagClose =
1171+ indexOf (pdf , "?>" .getBytes (java .nio .charset .StandardCharsets .US_ASCII ), endPos );
1172+ if (endTagClose >= 0 ) {
1173+ int packetEnd = endTagClose + 2 ;
1174+ byte [] xmp = new byte [packetEnd - lastBeginPos ];
1175+ System .arraycopy (pdf , lastBeginPos , xmp , 0 , xmp .length );
1176+ return xmp ;
1177+ }
1178+ }
1179+ }
10581180
1059- int endTagClose =
1060- indexOf (pdf , "?>" .getBytes (java .nio .charset .StandardCharsets .US_ASCII ), endPos );
1061- if (endTagClose < 0 ) return new byte [0 ];
1062- int packetEnd = endTagClose + 2 ;
1181+ // Fallback: scan for <x:xmpmeta ...> ... </x:xmpmeta> (no xpacket wrapper)
1182+ return extractXmpmetaFallback (pdf );
1183+ }
10631184
1185+ private static byte [] extractXmpmetaFallback (byte [] pdf ) {
1186+ byte [] beginTag = "<x:xmpmeta" .getBytes (java .nio .charset .StandardCharsets .US_ASCII );
1187+ byte [] endTag = "</x:xmpmeta>" .getBytes (java .nio .charset .StandardCharsets .US_ASCII );
1188+ // Find LAST <x:xmpmeta occurrence
1189+ int lastBeginPos = -1 ;
1190+ int searchFrom = 0 ;
1191+ while (searchFrom < pdf .length ) {
1192+ int pos = indexOf (pdf , beginTag , searchFrom );
1193+ if (pos < 0 ) break ;
1194+ lastBeginPos = pos ;
1195+ searchFrom = pos + 1 ;
1196+ }
1197+ if (lastBeginPos < 0 ) return new byte [0 ];
1198+ int endPos = indexOf (pdf , endTag , lastBeginPos );
1199+ if (endPos < 0 ) return new byte [0 ];
1200+ int packetEnd = endPos + endTag .length ;
10641201 byte [] xmp = new byte [packetEnd - lastBeginPos ];
10651202 System .arraycopy (pdf , lastBeginPos , xmp , 0 , xmp .length );
10661203 return xmp ;
0 commit comments