|
5 | 5 | This script generates a simple PDF file that uses Brotli compression
|
6 | 6 | for the content stream, allowing for testing of the BrotliDecode filter
|
7 | 7 | in pypdf.
|
| 8 | +
|
| 9 | +Note: /BrotliDecode is not a standard PDF filter. This file is specifically |
| 10 | +for testing PDF library support for this filter (e.g., in pypdf). |
| 11 | +Standard PDF viewers will likely not render this file correctly. |
8 | 12 | """
|
9 | 13 |
|
| 14 | +import logging |
| 15 | +from pathlib import Path |
| 16 | + |
10 | 17 | import brotli
|
11 |
| -import os |
12 | 18 |
|
13 |
| -# Simple PDF structure with Brotli-compressed content stream |
14 |
| -# The content stream will contain a simple "Hello, Brotli!" text |
| 19 | +logging.basicConfig(level=logging.INFO, format="%(name)s: %(levelname)s: %(message)s") |
| 20 | +logger = logging.getLogger(__name__) |
| 21 | + |
15 | 22 | content_stream = b"BT /F1 24 Tf 100 700 Td (Hello, Brotli!) Tj ET"
|
16 | 23 | compressed_content = brotli.compress(content_stream, quality=5)
|
17 | 24 |
|
18 |
| -# PDF structure |
19 |
| -pdf = [ |
20 |
| - b"%PDF-1.7\n", |
21 |
| - b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n", |
22 |
| - b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n", |
23 |
| - b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>\nendobj\n", |
24 |
| - b"4 0 obj\n<< /Length " + str(len(compressed_content)).encode() + b" /Filter /BrotliDecode >>\nstream\n" + compressed_content + b"\nendstream\nendobj\n", |
25 |
| - b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n", |
26 |
| - b"xref\n0 6\n0000000000 65535 f \n0000000010 00000 n \n0000000060 00000 n \n0000000115 00000 n \n0000000234 00000 n \n" + |
27 |
| - (b"0000000" + str(334 + len(compressed_content)).encode() + b" 00000 n \n"), |
28 |
| - b"trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n" + str(400 + len(compressed_content)).encode() + b"\n%%EOF" |
29 |
| -] |
30 |
| - |
31 |
| -# Write PDF to file |
32 |
| -# Define paths relative to the script's location (resources/) |
33 |
| -script_dir = os.path.dirname(__file__) |
34 |
| -output_dir = os.path.join(script_dir, "brotli-test") |
35 |
| -output_path = os.path.join(output_dir, "brotli-compressed.pdf") |
36 |
| - |
37 |
| -# Ensure the output directory exists |
38 |
| -os.makedirs(output_dir, exist_ok=True) |
39 |
| -with open(output_path, "wb") as f: |
40 |
| - for part in pdf: |
41 |
| - f.write(part) |
42 |
| - |
43 |
| -print(f"Created test PDF with Brotli compression at: {output_path}") |
| 25 | +xref_offsets = [0] * 6 |
| 26 | +current_offset = 0 |
| 27 | +pdf_parts = [] |
| 28 | + |
| 29 | +part = b"%PDF-1.7\n%\xc2\xa5\xc2\xb1\xc3\xab\xc3\xbf\n" # Binary marker |
| 30 | +pdf_parts.append(part) |
| 31 | +current_offset += len(part) |
| 32 | +xref_offsets[1] = current_offset |
| 33 | + |
| 34 | +part = b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n" |
| 35 | +pdf_parts.append(part) |
| 36 | +current_offset += len(part) |
| 37 | +xref_offsets[2] = current_offset |
| 38 | + |
| 39 | +part = b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n" |
| 40 | +pdf_parts.append(part) |
| 41 | +current_offset += len(part) |
| 42 | +xref_offsets[3] = current_offset |
| 43 | + |
| 44 | +part = ( |
| 45 | + b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] " |
| 46 | + b"/Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>\nendobj\n" |
| 47 | +) |
| 48 | +pdf_parts.append(part) |
| 49 | +current_offset += len(part) |
| 50 | +xref_offsets[4] = current_offset |
| 51 | + |
| 52 | +part_header = ( |
| 53 | + f"4 0 obj\n<< /Length {len(compressed_content)} /Filter /BrotliDecode >>\nstream\n" |
| 54 | +).encode("ascii") |
| 55 | +part_footer = b"\nendstream\nendobj\n" |
| 56 | +pdf_parts.append(part_header) |
| 57 | +pdf_parts.append(compressed_content) |
| 58 | +pdf_parts.append(part_footer) |
| 59 | +current_offset += len(part_header) + len(compressed_content) + len(part_footer) |
| 60 | +xref_offsets[5] = current_offset |
| 61 | + |
| 62 | +part = b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n" |
| 63 | +pdf_parts.append(part) |
| 64 | +current_offset += len(part) |
| 65 | +xref_table_start_offset = current_offset |
| 66 | + |
| 67 | +xref_lines = [b"xref\n0 6\n", b"0000000000 65535 f \n"] |
| 68 | +xref_lines.extend( |
| 69 | + f"{xref_offsets[i]:010d} 00000 n \n".encode("ascii") for i in range(1, 6) |
| 70 | +) |
| 71 | +pdf_parts.extend(xref_lines) |
| 72 | + |
| 73 | +trailer = ( |
| 74 | + f"trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n{xref_table_start_offset}\n%%EOF" |
| 75 | +).encode("ascii") |
| 76 | +pdf_parts.append(trailer) |
| 77 | + |
| 78 | +script_path = Path(__file__).resolve() |
| 79 | +output_dir = script_path.parent / "brotli-test-pdfs" |
| 80 | +output_path = output_dir / "minimal-brotli-compressed.pdf" |
| 81 | + |
| 82 | +output_dir.mkdir(parents=True, exist_ok=True) |
| 83 | + |
| 84 | +try: |
| 85 | + with open(output_path, "wb") as f: |
| 86 | + for part in pdf_parts: |
| 87 | + f.write(part) |
| 88 | + logger.info(f"Created test PDF with Brotli compression at: {output_path}") |
| 89 | +except OSError: |
| 90 | + logger.exception("Error writing PDF file") |
0 commit comments