docling-project · ceberam · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/tests/test_backend_msword.py b/tests/test_backend_msword.py
@@ -107,7 +107,7 @@ def _test_e2e_docx_conversions_impl(docx_paths: list[tuple[Path, DoclingDocument
         ), f"export to indented-text failed on {docx_path}"
 
         assert verify_document(
-            doc, str(docx_path) + ".json", generate=GENERATE, fuzzy=True
+            doc, str(docx_path) + ".json", generate=GENERATE, fuzzy=False
         ), f"DoclingDocument verification failed on {docx_path}"
 
         if docx_path.name in {"word_tables.docx", "docx_rich_cells.docx"}:

diff --git a/tests/test_verify_utils.py b/tests/test_verify_utils.py
@@ -102,14 +102,97 @@ def test_verify_docitems_rejects_picture_count_mismatch():
         )
 
 
-def test_verify_docitems_uses_predicted_picture_image():
+def test_verify_docitems_uses_predicted_picture_image() -> None:
     doc_true = _make_doc_with_picture(image_size=(2, 2))
     doc_pred = _make_doc_with_picture(image_size=(3, 2))
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(AssertionError, match="Image width mismatch"):
         verify_docitems(
             doc_pred=doc_pred,
             doc_true=doc_true,
             fuzzy=False,
             pdf_filename="fixture.json",
         )
+
+
+@pytest.mark.parametrize(
+    "true_size,pred_size,fuzzy,should_pass,expected_error",
+    [
+        # Strict mode (fuzzy=False): tolerance is 1.5% of image dimension
+        # For 254x267 image: 3px = 1.18% width, 4px = 1.50% height
+        ((254, 267), (251, 267), False, True, None),  # 3px = 1.18% width: passes
+        (
+            (254, 267),
+            (250, 267),
+            False,
+            False,
+            "Image width mismatch",
+        ),  # 4px = 1.57%: fails
+        (
+            (254, 267),
+            (254, 263),
+            False,
+            True,
+            None,
+        ),  # 4px = 1.50% height: passes (at boundary)
+        (
+            (254, 267),
+            (254, 262),
+            False,
+            False,
+            "Image height mismatch",
+        ),  # 5px = 1.87%: fails
+        # Fuzzy mode (fuzzy=True): tolerance is 5% of image dimension
+        # For 254x267 image: 12px = 4.72% width, 13px = 4.87% height
+        ((254, 267), (242, 254), True, True, None),  # 12-13px = ~4.7-4.9%: passes
+        (
+            (254, 267),
+            (241, 267),
+            True,
+            False,
+            "Image width mismatch",
+        ),  # 13px = 5.12%: fails
+        (
+            (254, 267),
+            (254, 253),
+            True,
+            False,
+            "Image height mismatch",
+        ),  # 14px = 5.24%: fails
+        # Small images: percentage-based tolerance is precise
+        (
+            (10, 10),
+            (9, 9),
+            False,
+            False,
+            "Image width mismatch",
+        ),  # 1px = 10%: fails (>> 1.5%)
+        ((100, 100), (99, 99), False, True, None),  # 1px = 1%: passes (< 1.5%)
+    ],
+)
+def test_verify_docitems_image_size_fuzziness(
+    true_size: tuple[int, int],
+    pred_size: tuple[int, int],
+    fuzzy: bool,
+    should_pass: bool,
+    expected_error: str | None,
+) -> None:
+    """Test image size verification with percentage-based tolerance in strict and fuzzy modes."""
+    doc_true = _make_doc_with_picture(image_size=true_size)
+    doc_pred = _make_doc_with_picture(image_size=pred_size)
+
+    if should_pass:
+        verify_docitems(
+            doc_pred=doc_pred,
+            doc_true=doc_true,
+            fuzzy=fuzzy,
+            pdf_filename="fixture.json",
+        )
+    else:
+        with pytest.raises(AssertionError, match=expected_error):
+            verify_docitems(
+                doc_pred=doc_pred,
+                doc_true=doc_true,
+                fuzzy=fuzzy,
+                pdf_filename="fixture.json",
+            )
diff --git a/tests/verify_utils.py b/tests/verify_utils.py
@@ -26,6 +26,8 @@
 FUZZY_BBOX_TOL_RATIO = (
     0.005  # OCR/image output varies more, but gross shifts should fail
 )
+STRICT_IMAGE_SIZE_TOL_RATIO = 0.015  # allow ~1.5% cross-platform image size variance
+FUZZY_IMAGE_SIZE_TOL_RATIO = 0.05  # OCR/image output varies more, allow ~5%
 
 
 class _TestPagesMeta(BaseModel):
@@ -171,12 +173,46 @@ def verify_table_v2(true_item: TableItem, pred_item: TableItem, fuzzy: bool):
 
 
 def verify_picture_image_v2(
-    true_image: PILImage.Image, pred_item: Optional[PILImage.Image]
-):
+    true_image: PILImage.Image, pred_item: Optional[PILImage.Image], fuzzy: bool = False
+) -> bool:
+    """Compare image properties with optional fuzziness for cross-platform variance.
+
+    Args:
+        true_image: Ground truth image
+        pred_item: Predicted image
+        fuzzy: If True, allow larger size differences (e.g., OCR/image processing variance)
+
+    Note:
+        We don't compare image bytes as they can vary significantly across platforms even for visually identical images
+    """
     assert pred_item is not None, "predicted image is None"
-    assert true_image.size == pred_item.size
-    assert true_image.mode == pred_item.mode
-    # assert true_image.tobytes() == pred_item.tobytes()
+
+    # Check image mode (should be exact)
+    assert true_image.mode == pred_item.mode, (
+        f"Image mode mismatch: {true_image.mode} vs {pred_item.mode}"
+    )
+
+    # Check image size with percentage-based tolerance
+    tol_ratio = FUZZY_IMAGE_SIZE_TOL_RATIO if fuzzy else STRICT_IMAGE_SIZE_TOL_RATIO
+    true_width, true_height = true_image.size
+    pred_width, pred_height = pred_item.size
+
+    width_diff = abs(true_width - pred_width)
+    height_diff = abs(true_height - pred_height)
+
+    # Calculate actual percentage differences
+    width_diff_ratio = width_diff / true_width if true_width > 0 else 0
+    height_diff_ratio = height_diff / true_height if true_height > 0 else 0
+
+    assert width_diff_ratio <= tol_ratio, (
+        f"Image width mismatch: {true_width} vs {pred_width} "
+        f"(diff: {width_diff} pixels, {width_diff_ratio:.1%} vs tolerance {tol_ratio:.1%})"
+    )
+    assert height_diff_ratio <= tol_ratio, (
+        f"Image height mismatch: {true_height} vs {pred_height} "
+        f"(diff: {height_diff} pixels, {height_diff_ratio:.1%} vs tolerance {tol_ratio:.1%})"
+    )
+
     return True
 
 
@@ -285,7 +321,7 @@ def verify_docitems(
             true_image = true_item.get_image(doc=doc_true)
             pred_image = pred_item.get_image(doc=doc_pred)
             if true_image is not None:
-                assert verify_picture_image_v2(true_image, pred_image), (
+                assert verify_picture_image_v2(true_image, pred_image, fuzzy=fuzzy), (
                     f"[{pdf_filename}] Picture image mismatch"
                 )
         # TODO: check picture annotations