Skip to content

Commit 52545c5

Browse files
ROB: Pad truncated data in bits2byte instead of reading out of bounds (#3820)
--------- Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
1 parent 56e0784 commit 52545c5

2 files changed

Lines changed: 41 additions & 0 deletions

File tree

pypdf/generic/_image_xobject.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,12 @@ def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:
131131

132132
byte_buffer = bytearray(buffer_size)
133133
mask = (1 << bits) - 1
134+
135+
required = size[1] * ((size[0] * bits + 7) // 8)
136+
if (length := len(data)) < required:
137+
logger_warning("Image data is not rectangular. Adding padding.", source=__name__)
138+
data += b"\x00" * (required - length)
139+
134140
data_index = 0
135141
bit = 8 - bits
136142
for y in range(size[1]):

tests/generic/test_image_xobject.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,3 +276,38 @@ def test_bits2byte__limit() -> None:
276276
match=r"^Requested buffer size 76500000 exceeds limit of 75000000\.$"
277277
):
278278
bits2byte(data=b"TEST", size=(9000, 8500), bits=8)
279+
280+
281+
def test_bits2byte__truncated_data(caplog: pytest.LogCaptureFixture) -> None:
282+
# 4x4 image at 2 bits per sample needs 4 bytes; provide only 1.
283+
result = bits2byte(data=b"\x00", size=(4, 4), bits=2)
284+
assert result == bytes(16)
285+
assert "Image data is not rectangular. Adding padding." in caplog.text
286+
287+
288+
def test_handle_flate__truncated_2bit_image(caplog: pytest.LogCaptureFixture) -> None:
289+
# A 3x3 indexed image at 2 bits per sample needs 3 bytes; provide only 1.
290+
# Padding the missing bytes lets the image still be loaded instead of
291+
# raising IndexError out of bits2byte.
292+
lookup = DecodedStreamObject()
293+
lookup.set_data(bytes([0, 0, 0, 10, 10, 10, 20, 20, 20, 30, 30, 30]))
294+
result = _handle_flate(
295+
size=(3, 3),
296+
data=b"\xe4",
297+
mode="2bits",
298+
color_space=ArrayObject(
299+
[NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(3), lookup]
300+
),
301+
colors=1,
302+
obj_as_text="dummy",
303+
)
304+
image = result[0]
305+
image.load()
306+
assert image.mode == "RGB"
307+
assert image.size == (3, 3)
308+
assert get_image_data(image) == (
309+
(30, 30, 30), (20, 20, 20), (10, 10, 10),
310+
(0, 0, 0), (0, 0, 0), (0, 0, 0),
311+
(0, 0, 0), (0, 0, 0), (0, 0, 0),
312+
)
313+
assert "Image data is not rectangular. Adding padding." in caplog.text

0 commit comments

Comments
 (0)