Skip to content

Commit 1573164

Browse files
committed
feat: Add support for BrotliDecode filter (PDF 2.0)
Implements the BrotliDecode filter as specified in ISO 32000-2:2020, Section 7.4.11. Adds necessary constants, integrates the filter into the decoding logic, includes brotli as an optional dependency, adds unit tests, and updates documentation. Closes #3223
1 parent 96ba79c commit 1573164

File tree

11 files changed

+121
-36
lines changed

11 files changed

+121
-36
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# CHANGELOG
22

3+
## Unreleased
4+
5+
### New Features (ENH)
6+
- Add support for BrotliDecode filter (PDF 2.0) (#3223)
7+
38
## Version 5.4.0, 2025-03-16
49

510
### New Features (ENH)
@@ -1680,7 +1685,7 @@ e.g. Russian / Chinese / Japanese / Korean / Arabic.
16801685
### Documentation (DOC)
16811686
- Notes on annotations (#982)
16821687
- Who uses PyPDF2
1683-
- intendet \xe2\x9e\x94 in robustness page (#958)
1688+
- inteted \xe2\x9e\x94 in robustness page (#958)
16841689

16851690
### Maintenance (MAINT)
16861691
- pre-commit / requirements.txt updates (#977)

docs/modules/constants.rst

+15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Constants
22
---------
33

4+
.. autoclass:: pypdf.constants.StrEnum
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:
8+
49
.. autoclass:: pypdf.constants.AnnotationFlag
510
:members:
611
:undoc-members:
@@ -26,3 +31,13 @@ Constants
2631
:undoc-members:
2732
:exclude-members: FT, Parent, Kids, T, TU, TM, V, DV, AA, Opt, attributes, attributes_dict
2833
:show-inheritance:
34+
35+
.. autoclass:: pypdf.constants.FilterTypes
36+
:members:
37+
:undoc-members:
38+
:show-inheritance:
39+
40+
.. autoclass:: pypdf.constants.FilterTypeAbbreviations
41+
:members:
42+
:undoc-members:
43+
:show-inheritance:

pypdf/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ class FilterTypes(StrEnum):
245245
CCITT_FAX_DECODE = "/CCITTFaxDecode" # abbreviation: CCF
246246
DCT_DECODE = "/DCTDecode" # abbreviation: DCT
247247
JPX_DECODE = "/JPXDecode"
248+
BROTLI_DECODE = "/BrotliDecode" # abbreviation: Br, PDF 2.0
248249

249250

250251
class FilterTypeAbbreviations:
@@ -257,6 +258,7 @@ class FilterTypeAbbreviations:
257258
RL = "/RL"
258259
CCF = "/CCF"
259260
DCT = "/DCT"
261+
BR = "/Br" # BrotliDecode
260262

261263

262264
class LzwFilterParameters:

pypdf/filters.py

+51
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@
6565
NullObject,
6666
)
6767

68+
try:
69+
import brotli
70+
except ImportError:
71+
brotli = None
72+
6873

6974
def decompress(data: bytes) -> bytes:
7075
"""
@@ -481,6 +486,50 @@ def decode(
481486
return data
482487

483488

489+
class BrotliDecode:
490+
"""Decodes Brotli-compressed data."""
491+
@staticmethod
492+
def decode(
493+
data: bytes,
494+
decode_parms: Optional[DictionaryObject] = None,
495+
**kwargs: Any,
496+
) -> bytes:
497+
"""
498+
Decode Brotli-compressed data.
499+
500+
Args:
501+
data: Brotli-compressed data.
502+
decode_parms: Optional parameters (unused).
503+
504+
Returns:
505+
Decompressed data.
506+
507+
Raises:
508+
PdfStreamError: If brotli library is not installed.
509+
"""
510+
if brotli is None:
511+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
512+
return brotli.decompress(data)
513+
514+
@staticmethod
515+
def encode(data: bytes, **kwargs: Any) -> bytes:
516+
"""
517+
Encode data using Brotli compression.
518+
519+
Args:
520+
data: Data to compress.
521+
522+
Returns:
523+
Compressed data.
524+
525+
Raises:
526+
PdfStreamError: If brotli library is not installed.
527+
"""
528+
if brotli is None:
529+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
530+
return brotli.compress(data)
531+
532+
484533
@dataclass
485534
class CCITTParameters:
486535
"""§7.4.6, optional parameters for the CCITTFaxDecode filter."""
@@ -666,6 +715,8 @@ def decode_stream_data(stream: Any) -> bytes:
666715
data = DCTDecode.decode(data)
667716
elif filter_name == FT.JPX_DECODE:
668717
data = JPXDecode.decode(data)
718+
elif filter_name == FT.BROTLI_DECODE: # Add BrotliDecode
719+
data = BrotliDecode.decode(data)
669720
elif filter_name == "/Crypt":
670721
if "/Name" in params or "/Type" in params:
671722
raise NotImplementedError(

pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ Source = "https://github.com/py-pdf/pypdf"
4242
crypto = ["cryptography"]
4343
cryptodome = ["PyCryptodome"]
4444
image = ["Pillow>=8.0.0"]
45+
brotli = ["Brotli"] # Add brotli dependency
4546
full = [
4647
"cryptography",
47-
"Pillow>=8.0.0"
48+
"Pillow>=8.0.0",
49+
"Brotli", # Add brotli to full dependencies
4850
]
4951
dev = [
5052
"black",

requirements/ci-3.11.txt

+9-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#
55
# pip-compile --output-file=requirements/ci-3.11.txt requirements/ci.in
66
#
7+
brotli==1.1.0
8+
# via -r requirements/ci.in
79
cffi==1.17.1
810
# via cryptography
911
coverage[toml]==7.6.4
@@ -36,6 +38,13 @@ pillow==11.0.0
3638
# fpdf2
3739
pluggy==1.5.0
3840
# via pytest
41+
ruff==0.11.0
42+
# via -r requirements/ci.in
43+
tomli==2.0.2
44+
# via
45+
# coverage
46+
# mypy
47+
# pytest
3948
py-cpuinfo==9.0.0
4049
# via pytest-benchmark
4150
pycparser==2.22
@@ -60,13 +69,6 @@ pytest-xdist==3.6.1
6069
# via -r requirements/ci.in
6170
pyyaml==6.0.2
6271
# via -r requirements/ci.in
63-
ruff==0.11.0
64-
# via -r requirements/ci.in
65-
tomli==2.0.2
66-
# via
67-
# coverage
68-
# mypy
69-
# pytest
7072
typeguard==4.3.0
7173
# via -r requirements/ci.in
7274
types-pillow==10.2.0.20240822

requirements/ci.in

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ pytest-cov
1313
typeguard
1414
types-Pillow
1515
pyyaml
16+
brotli

requirements/ci.txt

+4-13
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#
2-
# This file is autogenerated by pip-compile with Python 3.8
2+
# This file is autogenerated by pip-compile with Python 3.11
33
# by the following command:
44
#
5-
# pip-compile requirements/ci.in
5+
# pip-compile --output-file=requirements/ci.txt requirements/ci.in
66
#
7+
brotli==1.1.0
8+
# via -r requirements/ci.in
79
cffi==1.17.1
810
# via cryptography
911
coverage[toml]==7.6.1
@@ -14,16 +16,12 @@ cryptography==44.0.1
1416
# via -r requirements/ci.in
1517
defusedxml==0.7.1
1618
# via fpdf2
17-
exceptiongroup==1.2.2
18-
# via pytest
1919
execnet==2.1.1
2020
# via pytest-xdist
2121
fonttools==4.54.1
2222
# via fpdf2
2323
fpdf2==2.8.1
2424
# via -r requirements/ci.in
25-
importlib-metadata==8.5.0
26-
# via typeguard
2725
iniconfig==2.0.0
2826
# via pytest
2927
mypy==1.13.0
@@ -62,11 +60,6 @@ pytest-xdist==3.6.1
6260
# via -r requirements/ci.in
6361
pyyaml==6.0.2
6462
# via -r requirements/ci.in
65-
tomli==2.0.2
66-
# via
67-
# coverage
68-
# mypy
69-
# pytest
7063
typeguard==4.3.0
7164
# via -r requirements/ci.in
7265
types-pillow==10.2.0.20240822
@@ -75,5 +68,3 @@ typing-extensions==4.12.2
7568
# via
7669
# mypy
7770
# typeguard
78-
zipp==3.20.2
79-
# via importlib-metadata

requirements/dev.in

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pre-commit
44
pytest-cov
55
flit
66
wheel
7+
brotli

requirements/dev.txt

+4-14
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#
2-
# This file is autogenerated by pip-compile with Python 3.8
2+
# This file is autogenerated by pip-compile with Python 3.11
33
# by the following command:
44
#
5-
# pip-compile requirements/dev.in
5+
# pip-compile --output-file=requirements/dev.txt requirements/dev.in
66
#
7+
brotli==1.1.0
8+
# via -r requirements/dev.in
79
build==1.2.2.post1
810
# via pip-tools
911
certifi==2024.8.30
@@ -20,8 +22,6 @@ distlib==0.3.9
2022
# via virtualenv
2123
docutils==0.20.1
2224
# via flit
23-
exceptiongroup==1.2.2
24-
# via pytest
2525
filelock==3.16.1
2626
# via virtualenv
2727
flit==3.9.0
@@ -32,8 +32,6 @@ identify==2.6.1
3232
# via pre-commit
3333
idna==3.10
3434
# via requests
35-
importlib-metadata==8.5.0
36-
# via build
3735
iniconfig==2.0.0
3836
# via pytest
3937
nodeenv==1.9.1
@@ -64,12 +62,6 @@ pyyaml==6.0.2
6462
# via pre-commit
6563
requests==2.32.3
6664
# via flit
67-
tomli==2.0.2
68-
# via
69-
# build
70-
# coverage
71-
# pip-tools
72-
# pytest
7365
tomli-w==1.0.0
7466
# via flit
7567
urllib3==2.2.3
@@ -80,8 +72,6 @@ wheel==0.44.0
8072
# via
8173
# -r requirements/dev.in
8274
# pip-tools
83-
zipp==3.20.2
84-
# via importlib-metadata
8575

8676
# The following packages are considered to be unsafe in a requirements file:
8777
# pip

tests/test_filters.py

+25
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from itertools import product as cartesian_product
88
from pathlib import Path
99

10+
import brotli # noqa: F401
1011
import pytest
1112
from PIL import Image, ImageOps
1213

@@ -15,6 +16,7 @@
1516
from pypdf.filters import (
1617
ASCII85Decode,
1718
ASCIIHexDecode,
19+
BrotliDecode, # Add BrotliDecode
1820
CCITParameters,
1921
CCITTFaxDecode,
2022
CCITTParameters,
@@ -52,6 +54,29 @@ def test_flate_decode_encode(predictor, s):
5254
assert codec.decode(encoded, DictionaryObject({"/Predictor": predictor})) == s
5355

5456

57+
@pytest.mark.parametrize("s", filter_inputs)
58+
def test_brotli_decode_encode(s):
59+
"""BrotliDecode encode() and decode() methods work as expected."""
60+
codec = BrotliDecode()
61+
s_bytes = s.encode()
62+
encoded = codec.encode(s_bytes)
63+
assert encoded != s_bytes # Ensure encoding actually happened
64+
decoded = codec.decode(encoded)
65+
assert decoded == s_bytes
66+
67+
68+
def test_brotli_decode_without_brotli_installed(monkeypatch):
69+
"""Verify BrotliDecode raises PdfReadError if brotli is not installed."""
70+
# Simulate brotli not being installed within the filters module
71+
monkeypatch.setattr("pypdf.filters.brotli", None)
72+
73+
codec = BrotliDecode()
74+
with pytest.raises(PdfReadError) as exc_info:
75+
codec.decode(b"test data")
76+
77+
assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
78+
79+
5580
def test_flatedecode_unsupported_predictor():
5681
"""
5782
FlateDecode raises PdfReadError for unsupported predictors.

0 commit comments

Comments
 (0)