Docling >=0.50 vs == 0.49 breaks for images in tables

When upgrading from 0.49 to 0.50 I observe a change in the behaviour from docling.

This change seems to be much more impactful than just switching to the new Heron model. See below


Input is (preview here - full document below)

<img width="633" height="883" alt="Image" src="https://github.com/user-attachments/assets/9e6418d1-5ed1-4082-ab1a-e6065e9f0586" />


## For 0.49 the result is

### With Heron

Image detected but no longer correctly referenced.

No longer parsable with heuristics afterwards.

<img width="846" height="739" alt="Image" src="https://github.com/user-attachments/assets/f25be4e6-1adf-40ab-a2d8-85a3e2db525b" />

### Without Heron

Image correctly detected and classified and referenced.

However a bit distorting the table structure (but still parsable with sufficient heuristics) afterwards.

<img width="568" height="928" alt="Image" src="https://github.com/user-attachments/assets/79857544-73ae-46c6-8a2d-faba1f79e301" />

## For 0.50 the result is

### With Heron

Image is not there anymore, even the image classification is missing; no hotfix afterwards/heuristics possible

<img width="846" height="739" alt="Image" src="https://github.com/user-attachments/assets/7f2e169f-f488-43a0-ae89-3376b9f980a2" />

### Without Heron

Image is not there anymore, even the image classification is missing; no hotfix afterwards/heuristics possible

<img width="846" height="739" alt="Image" src="https://github.com/user-attachments/assets/20ad37c9-54d2-4d16-8f98-1c37af54b291" />

> NOTICE: This is independent of heron (the major change that happend)

So what is going on?


See the code to reproduce (plus attached dat file).

You have to install/change the library versions accordingly as needed.

Model download and imports

```python
from docling_core.types.doc.document import DoclingDocument
from docling_core.types.io import DocumentStream
from pypdf import PdfReader

import psutil
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
    RapidOcrOptions,
    TableFormerMode,
    ThreadedPdfPipelineOptions
)
from docling.document_converter import DocumentConverter, PdfFormatOption

import os
from pathlib import Path

import requests
import torch
from docling.utils.model_downloader import download_models
from huggingface_hub import hf_hub_download

from modelscope import snapshot_download
from tqdm import tqdm
# from wand.image import Image as WImage
from IPython.display import display, HTML

from docling.datamodel.pipeline_options import LayoutOptions
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_HERON, DOCLING_LAYOUT_V2
from docling.models.layout_model import LayoutModel
from docling_core.types.doc import ImageRefMode

DEFAULT_CACHE_DIR = Path.home() / ".cache"
CACHE_DIR = Path(os.getenv("MODEL_CACHE_DIR", DEFAULT_CACHE_DIR))
PADDLEOCR_MODELS_PATH = CACHE_DIR / "paddleocr_v4_native"

hf_model_path = CACHE_DIR / "huggingface/hub"
modelscope_model_path = CACHE_DIR / "modelscope/hub"


hf_model_path.mkdir(parents=True, exist_ok=True)
modelscope_model_path.mkdir(parents=True, exist_ok=True)

rapidocr_models_root = str(modelscope_model_path / "RapidAI/RapidOCR/onnx")

def download_modelscope(model_id, model_revision, cache_dir, allow_patterns):
    print("--- Starting Filtered Model Download ---")
    print(f"Model ID: {model_id}")
    print(f"Revision: {model_revision}")
    print(f"Target Directory: {cache_dir}")
    print(f"Allow Patterns: {allow_patterns}")
    print("--------------------------------------")

    local_model_path = snapshot_download(
        model_id=model_id,
        revision=model_revision,
        cache_dir=cache_dir,
        allow_patterns=allow_patterns,
    )
    print("\n✅ Model downloaded successfully!")
    print(f"Model files are located at: {local_model_path}")


rapidocr_model_files = [
        "configuration.json",
        "onnx/PP-OCRv5/det/ch_PP-OCRv5_server_det.onnx",
        "onnx/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer.onnx",
        "onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx",
        "resources/fonts/FZYTK.TTF",
    ]
download_modelscope(
    model_id="RapidAI/RapidOCR",
    model_revision="v3.4.0",
    cache_dir=modelscope_model_path,
    allow_patterns=rapidocr_model_files,
)

download_models(progress=True, with_easyocr=False)

LayoutModel.download_models(
    local_dir=hf_model_path / DOCLING_LAYOUT_V2.model_repo_folder,
    force=False,
    progress=True,
)
```
 document parser

```python
def setup_ocr(model_spec=None):
    physical_cores = psutil.cpu_count(logical=False)
    usable_cores = max(2, physical_cores)
    
    IMAGE_RESOLUTION_SCALE = 4

    pipeline_options = ThreadedPdfPipelineOptions()
    pipeline_options.do_ocr = True
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
    pipeline_options.generate_page_images = False
    pipeline_options.do_picture_classification = True
    pipeline_options.generate_picture_images = True
    
    det_model_path = os.path.join(
        rapidocr_models_root, "PP-OCRv5/det/ch_PP-OCRv5_server_det.onnx"
    )
    rec_model_path = os.path.join(
        rapidocr_models_root, "PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer.onnx"
    )
    cls_model_path = os.path.join(
        rapidocr_models_root, "PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
    )
    rec_font_path = os.path.join(rapidocr_models_root, "resources/fonts/FZYTK.TTF")

    pipeline_options.ocr_options = RapidOcrOptions(
        det_model_path=det_model_path,
        rec_model_path=rec_model_path,
        cls_model_path=cls_model_path,
        rec_font_path=rec_font_path,
    )
    
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
    pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE
    pipeline_options.ocr_options.lang = ["en"]

    pipeline_options.accelerator_options = AcceleratorOptions(
        num_threads=usable_cores, device=AcceleratorDevice.AUTO
    )

    if model_spec:
        doc_converter = DocumentConverter(
            format_options={
                InputFormat.PDF: PdfFormatOption(
                    pipeline_options=pipeline_options,
                    layout_options=LayoutOptions(model_spec=model_spec)
                ),
            }
        )        
    else:
        doc_converter = DocumentConverter(
            format_options={
                InputFormat.PDF: PdfFormatOption(
                ),
            }
        )        
    return doc_converter
```

With Heron

```python
input_path = 'WO2024040109-eval-small'


doc_converter = setup_ocr(model_spec=DOCLING_LAYOUT_HERON)
conv_res = doc_converter.convert(f'{input_path}.pdf')
conv_res.document.save_as_html(f"{input_path}.html")
display(HTML(conv_res.document.export_to_html()))
```

Without Heron

```python
input_path = 'WO2024040109-eval-small'


doc_converter = setup_ocr(model_spec=DOCLING_LAYOUT_V2)
conv_res = doc_converter.convert(f'{input_path}.pdf')
conv_res.document.save_as_html(f"{input_path}_old.html")
display(HTML(conv_res.document.export_to_html()))
```

The document which was used for this testing is

[WO2024040109-eval-small.pdf](https://github.com/user-attachments/files/22197268/WO2024040109-eval-small.pdf)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Docling >=0.50 vs == 0.49 breaks for images in tables #2223

For 0.49 the result is

With Heron

Without Heron

For 0.50 the result is

With Heron

Without Heron

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Docling >=0.50 vs == 0.49 breaks for images in tables #2223

Description

For 0.49 the result is

With Heron

Without Heron

For 0.50 the result is

With Heron

Without Heron

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions