|
23 | 23 | from pdfrest.types.public import PdfRedactionPreset |
24 | 24 |
|
25 | 25 | from ..types import ( |
| 26 | + HtmlPageOrientation, |
| 27 | + HtmlPageSize, |
| 28 | + HtmlWebLayout, |
26 | 29 | OcrLanguage, |
27 | 30 | PdfAType, |
| 31 | + PdfConversionCompression, |
| 32 | + PdfConversionDownsample, |
| 33 | + PdfConversionLocale, |
28 | 34 | PdfInfoQuery, |
29 | 35 | PdfPageOrientation, |
30 | 36 | PdfPageSize, |
@@ -114,6 +120,10 @@ def _serialize_as_first_file_id(value: list[PdfRestFile]) -> str: |
114 | 120 | return str(value[0].id) |
115 | 121 |
|
116 | 122 |
|
| 123 | +def _serialize_as_first_url(value: list[HttpUrl]) -> str: |
| 124 | + return str(value[0]) |
| 125 | + |
| 126 | + |
117 | 127 | def _serialize_as_comma_separated_string(value: list[Any] | None) -> str | None: |
118 | 128 | if value is None: |
119 | 129 | return None |
@@ -243,6 +253,9 @@ def _validate_output_language(value: str) -> str: |
243 | 253 | return trimmed |
244 | 254 |
|
245 | 255 |
|
| 256 | +_PAGE_MARGIN_REGEX = r"^(?:\d+(?:\.\d+)?)(?:mm|in)$" |
| 257 | + |
| 258 | + |
246 | 259 | class UploadURLs(BaseModel): |
247 | 260 | url: Annotated[ |
248 | 261 | list[HttpUrl] | HttpUrl, |
@@ -551,6 +564,276 @@ class ConvertToMarkdownPayload(BaseModel): |
551 | 564 | ] = None |
552 | 565 |
|
553 | 566 |
|
| 567 | +_PDF_WORD_MIME_TYPES = { |
| 568 | + "application/msword", |
| 569 | + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| 570 | +} |
| 571 | +_PDF_EXCEL_MIME_TYPES = { |
| 572 | + "application/vnd.ms-excel", |
| 573 | + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| 574 | +} |
| 575 | +_PDF_POWERPOINT_MIME_TYPES = { |
| 576 | + "application/vnd.ms-powerpoint", |
| 577 | + "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
| 578 | +} |
| 579 | +_PDF_OFFICE_MIME_TYPES = ( |
| 580 | + _PDF_WORD_MIME_TYPES | _PDF_EXCEL_MIME_TYPES | _PDF_POWERPOINT_MIME_TYPES |
| 581 | +) |
| 582 | +_PDF_POSTSCRIPT_MIME_TYPES = { |
| 583 | + "application/postscript", |
| 584 | + "application/eps", |
| 585 | + "application/x-eps", |
| 586 | +} |
| 587 | +_PDF_EMAIL_MIME_TYPES = {"message/rfc822"} |
| 588 | +_PDF_IMAGE_MIME_TYPES = { |
| 589 | + "image/jpeg", |
| 590 | + "image/tiff", |
| 591 | + "image/bmp", |
| 592 | + "image/png", |
| 593 | +} |
| 594 | +_PDF_HTML_MIME_TYPES = {"text/html"} |
| 595 | + |
| 596 | + |
| 597 | +class ConvertOfficeToPdfPayload(BaseModel): |
| 598 | + """Adapt caller options into a pdfRest-ready office-to-pdf payload.""" |
| 599 | + |
| 600 | + files: Annotated[ |
| 601 | + list[PdfRestFile], |
| 602 | + Field( |
| 603 | + min_length=1, |
| 604 | + max_length=1, |
| 605 | + validation_alias=AliasChoices("file", "files"), |
| 606 | + serialization_alias="id", |
| 607 | + ), |
| 608 | + BeforeValidator(_ensure_list), |
| 609 | + AfterValidator( |
| 610 | + _allowed_mime_types( |
| 611 | + *_PDF_OFFICE_MIME_TYPES, |
| 612 | + error_msg="Must be a Microsoft Office file.", |
| 613 | + ) |
| 614 | + ), |
| 615 | + PlainSerializer(_serialize_as_first_file_id), |
| 616 | + ] |
| 617 | + output: Annotated[ |
| 618 | + str | None, |
| 619 | + Field(serialization_alias="output", min_length=1, default=None), |
| 620 | + AfterValidator(_validate_output_prefix), |
| 621 | + ] = None |
| 622 | + compression: Annotated[ |
| 623 | + PdfConversionCompression | None, |
| 624 | + Field(serialization_alias="compression", default=None), |
| 625 | + ] = None |
| 626 | + downsample: Annotated[ |
| 627 | + PdfConversionDownsample | None, |
| 628 | + Field(serialization_alias="downsample", default=None), |
| 629 | + ] = None |
| 630 | + tagged_pdf: Annotated[ |
| 631 | + Literal["on", "off"] | None, |
| 632 | + Field(serialization_alias="tagged_pdf", default=None), |
| 633 | + BeforeValidator(_bool_to_on_off), |
| 634 | + ] = None |
| 635 | + locale: Annotated[ |
| 636 | + PdfConversionLocale | None, |
| 637 | + Field(serialization_alias="locale", default=None), |
| 638 | + ] = None |
| 639 | + |
| 640 | + @model_validator(mode="after") |
| 641 | + def _validate_option_compatibility(self) -> ConvertOfficeToPdfPayload: |
| 642 | + mime_type = self.files[0].type |
| 643 | + if self.locale is not None and mime_type not in _PDF_EXCEL_MIME_TYPES: |
| 644 | + msg = "locale is only supported for Excel inputs." |
| 645 | + raise ValueError(msg) |
| 646 | + return self |
| 647 | + |
| 648 | + |
| 649 | +class ConvertPostscriptToPdfPayload(BaseModel): |
| 650 | + """Adapt caller options into a pdfRest-ready postscript-to-pdf payload.""" |
| 651 | + |
| 652 | + files: Annotated[ |
| 653 | + list[PdfRestFile], |
| 654 | + Field( |
| 655 | + min_length=1, |
| 656 | + max_length=1, |
| 657 | + validation_alias=AliasChoices("file", "files"), |
| 658 | + serialization_alias="id", |
| 659 | + ), |
| 660 | + BeforeValidator(_ensure_list), |
| 661 | + AfterValidator( |
| 662 | + _allowed_mime_types( |
| 663 | + *_PDF_POSTSCRIPT_MIME_TYPES, |
| 664 | + error_msg="Must be a PostScript or EPS file.", |
| 665 | + ) |
| 666 | + ), |
| 667 | + PlainSerializer(_serialize_as_first_file_id), |
| 668 | + ] |
| 669 | + output: Annotated[ |
| 670 | + str | None, |
| 671 | + Field(serialization_alias="output", min_length=1, default=None), |
| 672 | + AfterValidator(_validate_output_prefix), |
| 673 | + ] = None |
| 674 | + compression: Annotated[ |
| 675 | + PdfConversionCompression | None, |
| 676 | + Field(serialization_alias="compression", default=None), |
| 677 | + ] = None |
| 678 | + downsample: Annotated[ |
| 679 | + PdfConversionDownsample | None, |
| 680 | + Field(serialization_alias="downsample", default=None), |
| 681 | + ] = None |
| 682 | + |
| 683 | + |
| 684 | +class ConvertEmailToPdfPayload(BaseModel): |
| 685 | + """Adapt caller options into a pdfRest-ready email-to-pdf payload.""" |
| 686 | + |
| 687 | + files: Annotated[ |
| 688 | + list[PdfRestFile], |
| 689 | + Field( |
| 690 | + min_length=1, |
| 691 | + max_length=1, |
| 692 | + validation_alias=AliasChoices("file", "files"), |
| 693 | + serialization_alias="id", |
| 694 | + ), |
| 695 | + BeforeValidator(_ensure_list), |
| 696 | + AfterValidator( |
| 697 | + _allowed_mime_types( |
| 698 | + *_PDF_EMAIL_MIME_TYPES, |
| 699 | + error_msg="Must be an RFC822 email file.", |
| 700 | + ) |
| 701 | + ), |
| 702 | + PlainSerializer(_serialize_as_first_file_id), |
| 703 | + ] |
| 704 | + output: Annotated[ |
| 705 | + str | None, |
| 706 | + Field(serialization_alias="output", min_length=1, default=None), |
| 707 | + AfterValidator(_validate_output_prefix), |
| 708 | + ] = None |
| 709 | + |
| 710 | + |
| 711 | +class ConvertImageToPdfPayload(BaseModel): |
| 712 | + """Adapt caller options into a pdfRest-ready image-to-pdf payload.""" |
| 713 | + |
| 714 | + files: Annotated[ |
| 715 | + list[PdfRestFile], |
| 716 | + Field( |
| 717 | + min_length=1, |
| 718 | + max_length=1, |
| 719 | + validation_alias=AliasChoices("file", "files"), |
| 720 | + serialization_alias="id", |
| 721 | + ), |
| 722 | + BeforeValidator(_ensure_list), |
| 723 | + AfterValidator( |
| 724 | + _allowed_mime_types( |
| 725 | + *_PDF_IMAGE_MIME_TYPES, |
| 726 | + error_msg="Must be a supported image file type.", |
| 727 | + ) |
| 728 | + ), |
| 729 | + PlainSerializer(_serialize_as_first_file_id), |
| 730 | + ] |
| 731 | + output: Annotated[ |
| 732 | + str | None, |
| 733 | + Field(serialization_alias="output", min_length=1, default=None), |
| 734 | + AfterValidator(_validate_output_prefix), |
| 735 | + ] = None |
| 736 | + |
| 737 | + |
| 738 | +class ConvertHtmlToPdfPayload(BaseModel): |
| 739 | + """Adapt caller options into a pdfRest-ready html-to-pdf payload.""" |
| 740 | + |
| 741 | + files: Annotated[ |
| 742 | + list[PdfRestFile], |
| 743 | + Field( |
| 744 | + min_length=1, |
| 745 | + max_length=1, |
| 746 | + validation_alias=AliasChoices("file", "files"), |
| 747 | + serialization_alias="id", |
| 748 | + ), |
| 749 | + BeforeValidator(_ensure_list), |
| 750 | + AfterValidator( |
| 751 | + _allowed_mime_types( |
| 752 | + *_PDF_HTML_MIME_TYPES, |
| 753 | + error_msg="Must be an HTML file.", |
| 754 | + ) |
| 755 | + ), |
| 756 | + PlainSerializer(_serialize_as_first_file_id), |
| 757 | + ] |
| 758 | + output: Annotated[ |
| 759 | + str | None, |
| 760 | + Field(serialization_alias="output", min_length=1, default=None), |
| 761 | + AfterValidator(_validate_output_prefix), |
| 762 | + ] = None |
| 763 | + compression: Annotated[ |
| 764 | + PdfConversionCompression | None, |
| 765 | + Field(serialization_alias="compression", default=None), |
| 766 | + ] = None |
| 767 | + downsample: Annotated[ |
| 768 | + PdfConversionDownsample | None, |
| 769 | + Field(serialization_alias="downsample", default=None), |
| 770 | + ] = None |
| 771 | + page_size: Annotated[ |
| 772 | + HtmlPageSize | None, |
| 773 | + Field(serialization_alias="page_size", default=None), |
| 774 | + ] = None |
| 775 | + page_margin: Annotated[ |
| 776 | + str | None, |
| 777 | + Field( |
| 778 | + serialization_alias="page_margin", |
| 779 | + pattern=_PAGE_MARGIN_REGEX, |
| 780 | + default=None, |
| 781 | + ), |
| 782 | + ] = None |
| 783 | + page_orientation: Annotated[ |
| 784 | + HtmlPageOrientation | None, |
| 785 | + Field(serialization_alias="page_orientation", default=None), |
| 786 | + ] = None |
| 787 | + web_layout: Annotated[ |
| 788 | + HtmlWebLayout | None, |
| 789 | + Field(serialization_alias="web_layout", default=None), |
| 790 | + ] = None |
| 791 | + |
| 792 | + |
| 793 | +class ConvertUrlToPdfPayload(BaseModel): |
| 794 | + """Adapt caller options into a pdfRest-ready convert-to-pdf payload for one URL.""" |
| 795 | + |
| 796 | + url: Annotated[ |
| 797 | + list[HttpUrl], |
| 798 | + Field(serialization_alias="url", min_length=1, max_length=1), |
| 799 | + BeforeValidator(_ensure_list), |
| 800 | + PlainSerializer(_serialize_as_first_url), |
| 801 | + ] |
| 802 | + output: Annotated[ |
| 803 | + str | None, |
| 804 | + Field(serialization_alias="output", min_length=1, default=None), |
| 805 | + AfterValidator(_validate_output_prefix), |
| 806 | + ] = None |
| 807 | + compression: Annotated[ |
| 808 | + PdfConversionCompression | None, |
| 809 | + Field(serialization_alias="compression", default=None), |
| 810 | + ] = None |
| 811 | + downsample: Annotated[ |
| 812 | + PdfConversionDownsample | None, |
| 813 | + Field(serialization_alias="downsample", default=None), |
| 814 | + ] = None |
| 815 | + page_size: Annotated[ |
| 816 | + HtmlPageSize | None, |
| 817 | + Field(serialization_alias="page_size", default=None), |
| 818 | + ] = None |
| 819 | + page_margin: Annotated[ |
| 820 | + str | None, |
| 821 | + Field( |
| 822 | + serialization_alias="page_margin", |
| 823 | + pattern=_PAGE_MARGIN_REGEX, |
| 824 | + default=None, |
| 825 | + ), |
| 826 | + ] = None |
| 827 | + page_orientation: Annotated[ |
| 828 | + HtmlPageOrientation | None, |
| 829 | + Field(serialization_alias="page_orientation", default=None), |
| 830 | + ] = None |
| 831 | + web_layout: Annotated[ |
| 832 | + HtmlWebLayout | None, |
| 833 | + Field(serialization_alias="web_layout", default=None), |
| 834 | + ] = None |
| 835 | + |
| 836 | + |
554 | 837 | class TranslatePdfTextPayload(BaseModel): |
555 | 838 | """Adapt caller options into a pdfRest-ready translate request payload.""" |
556 | 839 |
|
|
0 commit comments