Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/test_backend_msword.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _test_e2e_docx_conversions_impl(docx_paths: list[tuple[Path, DoclingDocument
), f"export to indented-text failed on {docx_path}"

assert verify_document(
doc, str(docx_path) + ".json", generate=GENERATE, fuzzy=True
doc, str(docx_path) + ".json", generate=GENERATE, fuzzy=False
), f"DoclingDocument verification failed on {docx_path}"

if docx_path.name in {"word_tables.docx", "docx_rich_cells.docx"}:
Expand Down
87 changes: 85 additions & 2 deletions tests/test_verify_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,97 @@ def test_verify_docitems_rejects_picture_count_mismatch():
)


def test_verify_docitems_uses_predicted_picture_image():
def test_verify_docitems_uses_predicted_picture_image() -> None:
doc_true = _make_doc_with_picture(image_size=(2, 2))
doc_pred = _make_doc_with_picture(image_size=(3, 2))

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="Image width mismatch"):
verify_docitems(
doc_pred=doc_pred,
doc_true=doc_true,
fuzzy=False,
pdf_filename="fixture.json",
)


@pytest.mark.parametrize(
"true_size,pred_size,fuzzy,should_pass,expected_error",
[
# Strict mode (fuzzy=False): tolerance is 1.5% of image dimension
# For 254x267 image: 3px = 1.18% width, 4px = 1.50% height
((254, 267), (251, 267), False, True, None), # 3px = 1.18% width: passes
(
(254, 267),
(250, 267),
False,
False,
"Image width mismatch",
), # 4px = 1.57%: fails
(
(254, 267),
(254, 263),
False,
True,
None,
), # 4px = 1.50% height: passes (at boundary)
(
(254, 267),
(254, 262),
False,
False,
"Image height mismatch",
), # 5px = 1.87%: fails
# Fuzzy mode (fuzzy=True): tolerance is 5% of image dimension
# For 254x267 image: 12px = 4.72% width, 13px = 4.87% height
((254, 267), (242, 254), True, True, None), # 12-13px = ~4.7-4.9%: passes
(
(254, 267),
(241, 267),
True,
False,
"Image width mismatch",
), # 13px = 5.12%: fails
(
(254, 267),
(254, 253),
True,
False,
"Image height mismatch",
), # 14px = 5.24%: fails
# Small images: percentage-based tolerance is precise
(
(10, 10),
(9, 9),
False,
False,
"Image width mismatch",
), # 1px = 10%: fails (>> 1.5%)
((100, 100), (99, 99), False, True, None), # 1px = 1%: passes (< 1.5%)
],
)
def test_verify_docitems_image_size_fuzziness(
true_size: tuple[int, int],
pred_size: tuple[int, int],
fuzzy: bool,
should_pass: bool,
expected_error: str | None,
) -> None:
"""Test image size verification with percentage-based tolerance in strict and fuzzy modes."""
doc_true = _make_doc_with_picture(image_size=true_size)
doc_pred = _make_doc_with_picture(image_size=pred_size)

if should_pass:
verify_docitems(
doc_pred=doc_pred,
doc_true=doc_true,
fuzzy=fuzzy,
pdf_filename="fixture.json",
)
else:
with pytest.raises(AssertionError, match=expected_error):
verify_docitems(
doc_pred=doc_pred,
doc_true=doc_true,
fuzzy=fuzzy,
pdf_filename="fixture.json",
)
48 changes: 42 additions & 6 deletions tests/verify_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
FUZZY_BBOX_TOL_RATIO = (
0.005 # OCR/image output varies more, but gross shifts should fail
)
STRICT_IMAGE_SIZE_TOL_RATIO = 0.015 # allow ~1.5% cross-platform image size variance
FUZZY_IMAGE_SIZE_TOL_RATIO = 0.05 # OCR/image output varies more, allow ~5%


class _TestPagesMeta(BaseModel):
Expand Down Expand Up @@ -171,12 +173,46 @@ def verify_table_v2(true_item: TableItem, pred_item: TableItem, fuzzy: bool):


def verify_picture_image_v2(
true_image: PILImage.Image, pred_item: Optional[PILImage.Image]
):
true_image: PILImage.Image, pred_item: Optional[PILImage.Image], fuzzy: bool = False
) -> bool:
"""Compare image properties with optional fuzziness for cross-platform variance.

Args:
true_image: Ground truth image
pred_item: Predicted image
fuzzy: If True, allow larger size differences (e.g., OCR/image processing variance)

Note:
We don't compare image bytes as they can vary significantly across platforms even for visually identical images
"""
assert pred_item is not None, "predicted image is None"
assert true_image.size == pred_item.size
assert true_image.mode == pred_item.mode
# assert true_image.tobytes() == pred_item.tobytes()

# Check image mode (should be exact)
assert true_image.mode == pred_item.mode, (
f"Image mode mismatch: {true_image.mode} vs {pred_item.mode}"
)

# Check image size with percentage-based tolerance
tol_ratio = FUZZY_IMAGE_SIZE_TOL_RATIO if fuzzy else STRICT_IMAGE_SIZE_TOL_RATIO
true_width, true_height = true_image.size
pred_width, pred_height = pred_item.size

width_diff = abs(true_width - pred_width)
height_diff = abs(true_height - pred_height)

# Calculate actual percentage differences
width_diff_ratio = width_diff / true_width if true_width > 0 else 0
height_diff_ratio = height_diff / true_height if true_height > 0 else 0

assert width_diff_ratio <= tol_ratio, (
f"Image width mismatch: {true_width} vs {pred_width} "
f"(diff: {width_diff} pixels, {width_diff_ratio:.1%} vs tolerance {tol_ratio:.1%})"
)
assert height_diff_ratio <= tol_ratio, (
f"Image height mismatch: {true_height} vs {pred_height} "
f"(diff: {height_diff} pixels, {height_diff_ratio:.1%} vs tolerance {tol_ratio:.1%})"
)

return True


Expand Down Expand Up @@ -285,7 +321,7 @@ def verify_docitems(
true_image = true_item.get_image(doc=doc_true)
pred_image = pred_item.get_image(doc=doc_pred)
if true_image is not None:
assert verify_picture_image_v2(true_image, pred_image), (
assert verify_picture_image_v2(true_image, pred_image, fuzzy=fuzzy), (
f"[{pdf_filename}] Picture image mismatch"
)
# TODO: check picture annotations
Expand Down
Loading