Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,15 @@ def _flatten(
if attr in pages:
inherit[attr] = pages[attr]
pages_reference = getattr(pages, "indirect_reference", object())
for page in cast(ArrayObject, pages[PagesAttributes.KIDS]):
# A malformed /Pages node may be missing /Kids (for example a page
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think that this comment contributes enough to be useful here.

# tree advertising "/Count 0" without any children). Treat it as
# having no kids instead of raising a bare KeyError here (#3811).
kids = (
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be written in a simpler way:

Suggested change
kids = (
kids = pages.get(PagesAttributes.KIDS, ArrayObject())

pages[PagesAttributes.KIDS]
if PagesAttributes.KIDS in pages
else ArrayObject()
)
for page in cast(ArrayObject, kids):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While we are at it (and although not required here), I would recommend replacing this cast and increase the resilience here.

What I mean is that we should use an empty ArrayObject if the kids are a NullObject and raise a proper exception if we see anything different from an ArrayObject for the iteration.

if getattr(page, "indirect_reference", object()) == pages_reference:
raise PdfReadError("Detected cyclic page references.")

Expand Down
16 changes: 16 additions & 0 deletions tests/test_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
EncodedStreamObject,
NameObject,
NullObject,
NumberObject,
TextStringObject,
ViewerPreferences,
)
Expand Down Expand Up @@ -475,6 +476,21 @@ def test_flatten__cyclic_references():
reader._flatten()


def test_flatten__pages_without_kids():
# A malformed /Pages node may advertise "/Count 0" without providing any
# /Kids entry. Flattening such a page tree used to raise a bare
# ``KeyError: '/Kids'`` instead of being handled gracefully (#3811).
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")

pages_object = reader.root_object["/Pages"]
del pages_object["/Kids"]
pages_object[NameObject("/Count")] = NumberObject(0)
reader.flattened_pages = None

assert len(reader.pages) == 0
assert list(reader.pages) == []


@pytest.mark.enable_socket
@pytest.mark.timeout(10)
def test_get_outline__cyclic_references(caplog):
Expand Down
Loading