|
4 | 4 | import shutil |
5 | 5 | import subprocess |
6 | 6 | import tempfile |
| 7 | +import threading |
7 | 8 | from pathlib import Path |
8 | 9 |
|
9 | 10 | from docling_core.types.doc.document import ImageRef |
| 11 | +from PIL import Image, ImageChops |
10 | 12 |
|
11 | 13 | from docling.backend.latex.engines.base import RenderEngine |
12 | | -from docling.utils.locks import pypdfium2_lock |
13 | 14 |
|
14 | 15 | _log = logging.getLogger(__name__) |
| 16 | +_PYPDFIUM2_LOCK = threading.Lock() |
| 17 | + |
| 18 | + |
| 19 | +def _crop_whitespace( |
| 20 | + image: Image.Image, |
| 21 | + bg_color: float | tuple[int, ...] | int | None = None, |
| 22 | + padding: int = 0, |
| 23 | +) -> Image.Image: |
| 24 | + if bg_color is None: |
| 25 | + bg_color = image.getpixel((0, 0)) |
| 26 | + |
| 27 | + bg = Image.new(image.mode, image.size, bg_color) |
| 28 | + diff = ImageChops.difference(image, bg) |
| 29 | + bbox = diff.getbbox() |
| 30 | + if bbox is None: |
| 31 | + return image |
| 32 | + |
| 33 | + left, upper, right, lower = bbox |
| 34 | + left = max(0, left - padding) |
| 35 | + upper = max(0, upper - padding) |
| 36 | + right = min(image.width, right + padding) |
| 37 | + lower = min(image.height, lower + padding) |
| 38 | + return image.crop((left, upper, right, lower)) |
15 | 39 |
|
16 | 40 |
|
17 | 41 | class TectonicEngine(RenderEngine): |
@@ -281,17 +305,15 @@ def render( |
281 | 305 | try: |
282 | 306 | import pypdfium2 as pdfium |
283 | 307 |
|
284 | | - from docling.backend.docx.drawingml.utils import crop_whitespace |
285 | | - |
286 | | - with pypdfium2_lock: |
| 308 | + with _PYPDFIUM2_LOCK: |
287 | 309 | with pdfium.PdfDocument(pdf_file) as pdf: |
288 | 310 | page = pdf[0] |
289 | 311 | pil_image = page.render(scale=300 / 72).to_pil() |
290 | 312 | page.close() |
291 | 313 |
|
292 | 314 | # Auto-crop the generous border added by standalone, |
293 | 315 | # keeping a small padding (10px) for clean margins. |
294 | | - pil_image = crop_whitespace(pil_image, padding=10) |
| 316 | + pil_image = _crop_whitespace(pil_image, padding=10) |
295 | 317 |
|
296 | 318 | return ImageRef.from_pil(pil_image, dpi=300) |
297 | 319 | except Exception as e: |
|
0 commit comments