Skip to content

Commit 8b5fc49

Browse files
Merge pull request #18 from datalogics-cgreen/pdfcloud-5464-pdf
PDFCLOUD-5553 Add client methods to convert to `.pdf`
2 parents d74d92f + 17b02ac commit 8b5fc49

21 files changed

Lines changed: 3105 additions & 0 deletions

src/pdfrest/client.py

Lines changed: 436 additions & 0 deletions
Large diffs are not rendered by default.

src/pdfrest/models/_internal.py

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,14 @@
2323
from pdfrest.types.public import PdfRedactionPreset
2424

2525
from ..types import (
26+
HtmlPageOrientation,
27+
HtmlPageSize,
28+
HtmlWebLayout,
2629
OcrLanguage,
2730
PdfAType,
31+
PdfConversionCompression,
32+
PdfConversionDownsample,
33+
PdfConversionLocale,
2834
PdfInfoQuery,
2935
PdfPageOrientation,
3036
PdfPageSize,
@@ -114,6 +120,10 @@ def _serialize_as_first_file_id(value: list[PdfRestFile]) -> str:
114120
return str(value[0].id)
115121

116122

123+
def _serialize_as_first_url(value: list[HttpUrl]) -> str:
124+
return str(value[0])
125+
126+
117127
def _serialize_as_comma_separated_string(value: list[Any] | None) -> str | None:
118128
if value is None:
119129
return None
@@ -243,6 +253,9 @@ def _validate_output_language(value: str) -> str:
243253
return trimmed
244254

245255

256+
_PAGE_MARGIN_REGEX = r"^(?:\d+(?:\.\d+)?)(?:mm|in)$"
257+
258+
246259
class UploadURLs(BaseModel):
247260
url: Annotated[
248261
list[HttpUrl] | HttpUrl,
@@ -551,6 +564,276 @@ class ConvertToMarkdownPayload(BaseModel):
551564
] = None
552565

553566

567+
_PDF_WORD_MIME_TYPES = {
568+
"application/msword",
569+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
570+
}
571+
_PDF_EXCEL_MIME_TYPES = {
572+
"application/vnd.ms-excel",
573+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
574+
}
575+
_PDF_POWERPOINT_MIME_TYPES = {
576+
"application/vnd.ms-powerpoint",
577+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
578+
}
579+
_PDF_OFFICE_MIME_TYPES = (
580+
_PDF_WORD_MIME_TYPES | _PDF_EXCEL_MIME_TYPES | _PDF_POWERPOINT_MIME_TYPES
581+
)
582+
_PDF_POSTSCRIPT_MIME_TYPES = {
583+
"application/postscript",
584+
"application/eps",
585+
"application/x-eps",
586+
}
587+
_PDF_EMAIL_MIME_TYPES = {"message/rfc822"}
588+
_PDF_IMAGE_MIME_TYPES = {
589+
"image/jpeg",
590+
"image/tiff",
591+
"image/bmp",
592+
"image/png",
593+
}
594+
_PDF_HTML_MIME_TYPES = {"text/html"}
595+
596+
597+
class ConvertOfficeToPdfPayload(BaseModel):
598+
"""Adapt caller options into a pdfRest-ready office-to-pdf payload."""
599+
600+
files: Annotated[
601+
list[PdfRestFile],
602+
Field(
603+
min_length=1,
604+
max_length=1,
605+
validation_alias=AliasChoices("file", "files"),
606+
serialization_alias="id",
607+
),
608+
BeforeValidator(_ensure_list),
609+
AfterValidator(
610+
_allowed_mime_types(
611+
*_PDF_OFFICE_MIME_TYPES,
612+
error_msg="Must be a Microsoft Office file.",
613+
)
614+
),
615+
PlainSerializer(_serialize_as_first_file_id),
616+
]
617+
output: Annotated[
618+
str | None,
619+
Field(serialization_alias="output", min_length=1, default=None),
620+
AfterValidator(_validate_output_prefix),
621+
] = None
622+
compression: Annotated[
623+
PdfConversionCompression | None,
624+
Field(serialization_alias="compression", default=None),
625+
] = None
626+
downsample: Annotated[
627+
PdfConversionDownsample | None,
628+
Field(serialization_alias="downsample", default=None),
629+
] = None
630+
tagged_pdf: Annotated[
631+
Literal["on", "off"] | None,
632+
Field(serialization_alias="tagged_pdf", default=None),
633+
BeforeValidator(_bool_to_on_off),
634+
] = None
635+
locale: Annotated[
636+
PdfConversionLocale | None,
637+
Field(serialization_alias="locale", default=None),
638+
] = None
639+
640+
@model_validator(mode="after")
641+
def _validate_option_compatibility(self) -> ConvertOfficeToPdfPayload:
642+
mime_type = self.files[0].type
643+
if self.locale is not None and mime_type not in _PDF_EXCEL_MIME_TYPES:
644+
msg = "locale is only supported for Excel inputs."
645+
raise ValueError(msg)
646+
return self
647+
648+
649+
class ConvertPostscriptToPdfPayload(BaseModel):
650+
"""Adapt caller options into a pdfRest-ready postscript-to-pdf payload."""
651+
652+
files: Annotated[
653+
list[PdfRestFile],
654+
Field(
655+
min_length=1,
656+
max_length=1,
657+
validation_alias=AliasChoices("file", "files"),
658+
serialization_alias="id",
659+
),
660+
BeforeValidator(_ensure_list),
661+
AfterValidator(
662+
_allowed_mime_types(
663+
*_PDF_POSTSCRIPT_MIME_TYPES,
664+
error_msg="Must be a PostScript or EPS file.",
665+
)
666+
),
667+
PlainSerializer(_serialize_as_first_file_id),
668+
]
669+
output: Annotated[
670+
str | None,
671+
Field(serialization_alias="output", min_length=1, default=None),
672+
AfterValidator(_validate_output_prefix),
673+
] = None
674+
compression: Annotated[
675+
PdfConversionCompression | None,
676+
Field(serialization_alias="compression", default=None),
677+
] = None
678+
downsample: Annotated[
679+
PdfConversionDownsample | None,
680+
Field(serialization_alias="downsample", default=None),
681+
] = None
682+
683+
684+
class ConvertEmailToPdfPayload(BaseModel):
685+
"""Adapt caller options into a pdfRest-ready email-to-pdf payload."""
686+
687+
files: Annotated[
688+
list[PdfRestFile],
689+
Field(
690+
min_length=1,
691+
max_length=1,
692+
validation_alias=AliasChoices("file", "files"),
693+
serialization_alias="id",
694+
),
695+
BeforeValidator(_ensure_list),
696+
AfterValidator(
697+
_allowed_mime_types(
698+
*_PDF_EMAIL_MIME_TYPES,
699+
error_msg="Must be an RFC822 email file.",
700+
)
701+
),
702+
PlainSerializer(_serialize_as_first_file_id),
703+
]
704+
output: Annotated[
705+
str | None,
706+
Field(serialization_alias="output", min_length=1, default=None),
707+
AfterValidator(_validate_output_prefix),
708+
] = None
709+
710+
711+
class ConvertImageToPdfPayload(BaseModel):
712+
"""Adapt caller options into a pdfRest-ready image-to-pdf payload."""
713+
714+
files: Annotated[
715+
list[PdfRestFile],
716+
Field(
717+
min_length=1,
718+
max_length=1,
719+
validation_alias=AliasChoices("file", "files"),
720+
serialization_alias="id",
721+
),
722+
BeforeValidator(_ensure_list),
723+
AfterValidator(
724+
_allowed_mime_types(
725+
*_PDF_IMAGE_MIME_TYPES,
726+
error_msg="Must be a supported image file type.",
727+
)
728+
),
729+
PlainSerializer(_serialize_as_first_file_id),
730+
]
731+
output: Annotated[
732+
str | None,
733+
Field(serialization_alias="output", min_length=1, default=None),
734+
AfterValidator(_validate_output_prefix),
735+
] = None
736+
737+
738+
class ConvertHtmlToPdfPayload(BaseModel):
739+
"""Adapt caller options into a pdfRest-ready html-to-pdf payload."""
740+
741+
files: Annotated[
742+
list[PdfRestFile],
743+
Field(
744+
min_length=1,
745+
max_length=1,
746+
validation_alias=AliasChoices("file", "files"),
747+
serialization_alias="id",
748+
),
749+
BeforeValidator(_ensure_list),
750+
AfterValidator(
751+
_allowed_mime_types(
752+
*_PDF_HTML_MIME_TYPES,
753+
error_msg="Must be an HTML file.",
754+
)
755+
),
756+
PlainSerializer(_serialize_as_first_file_id),
757+
]
758+
output: Annotated[
759+
str | None,
760+
Field(serialization_alias="output", min_length=1, default=None),
761+
AfterValidator(_validate_output_prefix),
762+
] = None
763+
compression: Annotated[
764+
PdfConversionCompression | None,
765+
Field(serialization_alias="compression", default=None),
766+
] = None
767+
downsample: Annotated[
768+
PdfConversionDownsample | None,
769+
Field(serialization_alias="downsample", default=None),
770+
] = None
771+
page_size: Annotated[
772+
HtmlPageSize | None,
773+
Field(serialization_alias="page_size", default=None),
774+
] = None
775+
page_margin: Annotated[
776+
str | None,
777+
Field(
778+
serialization_alias="page_margin",
779+
pattern=_PAGE_MARGIN_REGEX,
780+
default=None,
781+
),
782+
] = None
783+
page_orientation: Annotated[
784+
HtmlPageOrientation | None,
785+
Field(serialization_alias="page_orientation", default=None),
786+
] = None
787+
web_layout: Annotated[
788+
HtmlWebLayout | None,
789+
Field(serialization_alias="web_layout", default=None),
790+
] = None
791+
792+
793+
class ConvertUrlToPdfPayload(BaseModel):
794+
"""Adapt caller options into a pdfRest-ready convert-to-pdf payload for one URL."""
795+
796+
url: Annotated[
797+
list[HttpUrl],
798+
Field(serialization_alias="url", min_length=1, max_length=1),
799+
BeforeValidator(_ensure_list),
800+
PlainSerializer(_serialize_as_first_url),
801+
]
802+
output: Annotated[
803+
str | None,
804+
Field(serialization_alias="output", min_length=1, default=None),
805+
AfterValidator(_validate_output_prefix),
806+
] = None
807+
compression: Annotated[
808+
PdfConversionCompression | None,
809+
Field(serialization_alias="compression", default=None),
810+
] = None
811+
downsample: Annotated[
812+
PdfConversionDownsample | None,
813+
Field(serialization_alias="downsample", default=None),
814+
] = None
815+
page_size: Annotated[
816+
HtmlPageSize | None,
817+
Field(serialization_alias="page_size", default=None),
818+
] = None
819+
page_margin: Annotated[
820+
str | None,
821+
Field(
822+
serialization_alias="page_margin",
823+
pattern=_PAGE_MARGIN_REGEX,
824+
default=None,
825+
),
826+
] = None
827+
page_orientation: Annotated[
828+
HtmlPageOrientation | None,
829+
Field(serialization_alias="page_orientation", default=None),
830+
] = None
831+
web_layout: Annotated[
832+
HtmlWebLayout | None,
833+
Field(serialization_alias="web_layout", default=None),
834+
] = None
835+
836+
554837
class TranslatePdfTextPayload(BaseModel):
555838
"""Adapt caller options into a pdfRest-ready translate request payload."""
556839

src/pdfrest/types/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,17 @@
1010
FlattenQuality,
1111
GifColorModel,
1212
GraphicSmoothing,
13+
HtmlPageOrientation,
14+
HtmlPageSize,
15+
HtmlWebLayout,
1316
JpegColorModel,
1417
OcrLanguage,
1518
PdfAddTextObject,
1619
PdfAType,
1720
PdfCmykColor,
21+
PdfConversionCompression,
22+
PdfConversionDownsample,
23+
PdfConversionLocale,
1824
PdfCustomPageSize,
1925
PdfInfoQuery,
2026
PdfMergeInput,
@@ -46,11 +52,17 @@
4652
"FlattenQuality",
4753
"GifColorModel",
4854
"GraphicSmoothing",
55+
"HtmlPageOrientation",
56+
"HtmlPageSize",
57+
"HtmlWebLayout",
4958
"JpegColorModel",
5059
"OcrLanguage",
5160
"PdfAType",
5261
"PdfAddTextObject",
5362
"PdfCmykColor",
63+
"PdfConversionCompression",
64+
"PdfConversionDownsample",
65+
"PdfConversionLocale",
5466
"PdfCustomPageSize",
5567
"PdfInfoQuery",
5668
"PdfMergeInput",

src/pdfrest/types/public.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,17 @@
2222
"FlattenQuality",
2323
"GifColorModel",
2424
"GraphicSmoothing",
25+
"HtmlPageOrientation",
26+
"HtmlPageSize",
27+
"HtmlWebLayout",
2528
"JpegColorModel",
2629
"OcrLanguage",
2730
"PdfAType",
2831
"PdfAddTextObject",
2932
"PdfCmykColor",
33+
"PdfConversionCompression",
34+
"PdfConversionDownsample",
35+
"PdfConversionLocale",
3036
"PdfCustomPageSize",
3137
"PdfInfoQuery",
3238
"PdfMergeInput",
@@ -144,6 +150,12 @@ class PdfMergeSource(TypedDict, total=False):
144150

145151
PdfMergeInput = PdfRestFile | PdfMergeSource | tuple[PdfRestFile, PdfPageSelection]
146152

153+
PdfConversionCompression = Literal["lossy", "lossless"]
154+
PdfConversionDownsample = Literal["off", 75, 150, 300, 600, 1200]
155+
PdfConversionLocale = Literal["US", "Germany"]
156+
HtmlPageSize = Literal["letter", "legal", "ledger", "A3", "A4", "A5"]
157+
HtmlPageOrientation = Literal["portrait", "landscape"]
158+
HtmlWebLayout = Literal["desktop", "tablet", "mobile"]
147159
PdfAType = Literal["PDF/A-1b", "PDF/A-2b", "PDF/A-2u", "PDF/A-3b", "PDF/A-3u"]
148160
PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"]
149161
ExtractTextGranularity = Literal["off", "by_page", "document"]
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from __future__ import annotations
2+
3+
from pdfrest.models import PdfRestFile
4+
5+
6+
def make_source_file(file_id: str, mime_type: str, name: str) -> PdfRestFile:
7+
return PdfRestFile.model_validate(
8+
{
9+
"id": file_id,
10+
"name": name,
11+
"url": f"https://api.pdfrest.com/resource/{file_id}",
12+
"type": mime_type,
13+
"size": 512,
14+
"modified": "2024-01-01T00:00:00Z",
15+
"scheduledDeletionTimeUtc": None,
16+
}
17+
)

0 commit comments

Comments
 (0)