diff --git a/pdfly/booklet.py b/pdfly/booklet.py new file mode 100644 index 0000000..9a68ad8 --- /dev/null +++ b/pdfly/booklet.py @@ -0,0 +1,159 @@ +""" +Reorder and two-up PDF pages for booklet printing. + +If the number of pages is not a multiple of four, pages are +added until it is a multiple of four. This includes a centerfold +in the middle of the booklet and a single page on the inside +back cover. The content of those pages are from the +centerfold-file and blank-page-file files, if specified, otherwise +they are blank pages. + +Example: + pdfly booklet input.pdf output.pdf + +""" + +# Copyright (c) 2014, Steve Witham . +# All rights reserved. This software is available under a BSD license; +# see https://github.com/py-pdf/pypdf/LICENSE + +import sys +import traceback +from pathlib import Path +from typing import Generator, Optional, Tuple + +from pypdf import ( + PageObject, + PdfReader, + PdfWriter, +) +from pypdf.generic import RectangleObject + + +def main( + filename: Path, + output: Path, + inside_cover_file: Optional[Path], + centerfold_file: Optional[Path], +) -> None: + try: + # Set up the streams + reader = PdfReader(filename) + pages = list(reader.pages) + writer = PdfWriter() + + # Add blank pages to make the number of pages a multiple of 4 + # If the user specified an inside-back-cover file, use it. + blank_page = PageObject.create_blank_page( + width=pages[0].mediabox.width, height=pages[0].mediabox.height + ) + if len(pages) % 2 == 1: + if inside_cover_file: + ic_reader_page = fetch_first_page(inside_cover_file) + pages.insert(-1, ic_reader_page) + else: + pages.insert(-1, blank_page) + if len(pages) % 4 == 2: + pages.insert(len(pages) // 2, blank_page) + pages.insert(len(pages) // 2, blank_page) + requires_centerfold = True + else: + requires_centerfold = False + + # Reorder the pages and place two pages side by side (2-up) on each sheet + for lhs, rhs in page_iter(len(pages)): + pages[lhs].merge_translated_page( + page2=pages[rhs], + tx=pages[lhs].mediabox.width, + ty=0, + expand=True, + over=True, + ) + writer.add_page(pages[lhs]) + + # If a centerfold was required, it is already + # present as a pair of blank pages. If the user + # specified a centerfold file, use it instead. + if requires_centerfold and centerfold_file: + centerfold_page = fetch_first_page(centerfold_file) + last_page = writer.pages[-1] + if centerfold_page.rotation != 0: + centerfold_page.transfer_rotation_to_content() + if requires_rotate(centerfold_page.mediabox, last_page.mediabox): + centerfold_page = centerfold_page.rotate(270) + if centerfold_page.rotation != 0: + centerfold_page.transfer_rotation_to_content() + last_page.merge_page(centerfold_page) + + # Everything looks good! Write the output file. + with open(output, "wb") as output_fh: + writer.write(output_fh) + + except Exception: + print(traceback.format_exc(), file=sys.stderr) + print(f"Error while reading {filename}", file=sys.stderr) + sys.exit(1) + + +def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool: + """ + Return True if a and b are rotated relative to each other. + + Args: + a (RectangleObject): The first rectangle. + b (RectangleObject): The second rectangle. + + """ + a_portrait = a.height > a.width + b_portrait = b.height > b.width + return a_portrait != b_portrait + + +def fetch_first_page(filename: Path) -> PageObject: + """ + Fetch the first page of a PDF file. + + Args: + filename (Path): The path to the PDF file. + + Returns: + PageObject: The first page of the PDF file. + + """ + return PdfReader(filename).pages[0] + + +# This function written with inspiration, assistance, and code +# from claude.ai & Github Copilot +def page_iter(num_pages: int) -> Generator[Tuple[int, int], None, None]: + """ + Generate pairs of page numbers for printing a booklet. + This function assumes that the total number of pages is divisible by 4. + It yields tuples of page numbers that should be printed on the same sheet + of paper to create a booklet. + + Args: + num_pages (int): The total number of pages in the document. Must be divisible by 4. + + Yields: + Generator[Tuple[int, int], None, None]: Tuples containing pairs of page numbers. + Each tuple represents the page numbers to be printed on one side of a sheet. + + Raises: + ValueError: If the number of pages is not divisible by 4. + + """ + if num_pages % 4 != 0: + raise ValueError("Number of pages must be divisible by 4") + + for sheet in range(num_pages // 4): + # Outside the fold + last_page = num_pages - sheet * 2 - 1 + first_page = sheet * 2 + + # Inside the fold + second_page = sheet * 2 + 1 + second_to_last_page = num_pages - sheet * 2 - 2 + + yield last_page, first_page + yield second_page, second_to_last_page diff --git a/pdfly/cli.py b/pdfly/cli.py index 44b5ecd..f0929a7 100644 --- a/pdfly/cli.py +++ b/pdfly/cli.py @@ -5,11 +5,12 @@ """ from pathlib import Path -from typing import List +from typing import List, Optional import typer from typing_extensions import Annotated +import pdfly.booklet import pdfly.cat import pdfly.compress import pdfly.extract_images @@ -98,6 +99,50 @@ def cat( pdfly.cat.main(filename, fn_pgrgs, output, verbose) +@entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc] +def booklet( + filename: Annotated[ + Path, + typer.Argument( + dir_okay=False, + exists=True, + resolve_path=True, + ), + ], + output: Annotated[ + Path, + typer.Argument( + dir_okay=False, + exists=False, + resolve_path=False, + ), + ], + blank_page: Annotated[ + Optional[Path], + typer.Option( + "-b", + "--blank-page-file", + help="page added if input is odd number of pages", + dir_okay=False, + exists=True, + resolve_path=True, + ), + ] = None, + centerfold: Annotated[ + Optional[Path], + typer.Option( + "-c", + "--centerfold-file", + help="double-page added if input is missing >= 2 pages", + dir_okay=False, + exists=True, + resolve_path=True, + ), + ] = None, +) -> None: + pdfly.booklet.main(filename, output, blank_page, centerfold) + + @entry_point.command(name="rm", help=pdfly.rm.__doc__) def rm( filename: Annotated[ diff --git a/resources/b.pdf b/resources/b.pdf new file mode 100644 index 0000000..ed19e00 Binary files /dev/null and b/resources/b.pdf differ diff --git a/resources/c.pdf b/resources/c.pdf new file mode 100644 index 0000000..38c55b1 Binary files /dev/null and b/resources/c.pdf differ diff --git a/resources/input8.pdf b/resources/input8.pdf new file mode 100644 index 0000000..f11b56d Binary files /dev/null and b/resources/input8.pdf differ diff --git a/tests/test_booklet.py b/tests/test_booklet.py new file mode 100644 index 0000000..4079e9b --- /dev/null +++ b/tests/test_booklet.py @@ -0,0 +1,121 @@ +import pytest +from pypdf import PdfReader + +from .conftest import RESOURCES_ROOT, chdir, run_cli + + +def test_booklet_fewer_args(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")]) + assert exit_code == 2 + captured = capsys.readouterr() + assert "Missing argument" in captured.err + + +def test_booklet_extra_args(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli( + ["booklet", str(RESOURCES_ROOT / "box.pdf"), "a.pdf", "b.pdf"] + ) + assert exit_code == 2 + captured = capsys.readouterr() + assert "unexpected extra argument" in captured.err + + +def test_booklet_page_size(capsys, tmp_path): + in_fname = str(RESOURCES_ROOT / "input8.pdf") + + with chdir(tmp_path): + exit_code = run_cli( + [ + "booklet", + in_fname, + "output8.pdf", + ] + ) + in_reader = PdfReader(in_fname) + out_reader = PdfReader("output8.pdf") + + assert exit_code == 0 + + assert len(in_reader.pages) == 8 + assert len(out_reader.pages) == 4 + + in_height = in_reader.pages[0].mediabox.height + in_width = in_reader.pages[0].mediabox.width + out_height = out_reader.pages[0].mediabox.height + out_width = out_reader.pages[0].mediabox.width + + assert out_width == in_width * 2 + assert in_height == out_height + + +@pytest.mark.parametrize( + ("page_count", "expected", "expected_bc"), + [ + ("8", "81\n27\n63\n45\n", "81\n27\n63\n45\n"), + ("7", "71\n2\n63\n45\n", "71\n2b\n63\n45\n"), + ("6", "61\n25\n43\n\n", "61\n25\n43\nc\n"), + ("5", "51\n2\n43\n\n", "51\n2b\n43\nc\n"), + ("4", "41\n23\n", "41\n23\n"), + ("3", "31\n2\n", "31\n2b\n"), + ("2", "21\n\n", "21\nc\n"), + ("1", "1\n\n", "1b\nc\n"), + ], +) +def test_booklet_order(capsys, tmp_path, page_count, expected, expected_bc): + with chdir(tmp_path): + exit_code = run_cli( + [ + "cat", + "-o", + f"input{page_count}.pdf", + str(RESOURCES_ROOT / "input8.pdf"), + f":{page_count}", + ] + ) + assert exit_code == 0 + + exit_code = run_cli( + [ + "booklet", + f"input{page_count}.pdf", + f"output{page_count}.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured.err + + exit_code = run_cli( + [ + "extract-text", + f"output{page_count}.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured.err + assert captured.out == expected + + exit_code = run_cli( + [ + "booklet", + "--centerfold-file", + str(RESOURCES_ROOT / "c.pdf"), + "--blank-page-file", + str(RESOURCES_ROOT / "b.pdf"), + f"input{page_count}.pdf", + f"outputbc{page_count}.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured.err + + exit_code = run_cli( + [ + "extract-text", + f"outputbc{page_count}.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured.err + assert captured.out == expected_bc