Fix: paddle ocr coordinate lower > upper (#13630)

Magicbook1108 · yingfeng · web-flow · commit 09ff1bc2b003 · 2026-03-16T20:15:26.000+08:00
### What problem does this PR solve? Fix: paddle ocr coordinate lower > upper #13618 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
diff --git a/deepdoc/parser/paddleocr_parser.py b/deepdoc/parser/paddleocr_parser.py
@@ -59,6 +59,18 @@ def _remove_images_from_markdown(markdown: str) -> str:
     return _MARKDOWN_IMAGE_PATTERN.sub("", markdown)
 
 
+def _normalize_bbox(bbox: list[Any] | tuple[Any, ...]) -> tuple[float, float, float, float]:
+    if len(bbox) < 4:
+        return 0.0, 0.0, 0.0, 0.0
+
+    left, top, right, bottom = (float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]))
+    if left > right:
+        left, right = right, left
+    if top > bottom:
+        top, bottom = bottom, top
+    return left, top, right, bottom
+
+
 @dataclass
 class PaddleOCRVLConfig:
     """Configuration for PaddleOCR-VL algorithm."""
@@ -393,8 +405,9 @@ def _transfer_to_sections(self, result: dict[str, Any], algorithm: AlgorithmType
 
                     label = block.get("block_label", "")
                     block_bbox = block.get("block_bbox", [0, 0, 0, 0])
+                    left, top, right, bottom = _normalize_bbox(block_bbox)
 
-                    tag = f"@@{page_idx + 1}\t{block_bbox[0] // self._ZOOMIN}\t{block_bbox[2] // self._ZOOMIN}\t{block_bbox[1] // self._ZOOMIN}\t{block_bbox[3] // self._ZOOMIN}##"
+                    tag = f"@@{page_idx + 1}\t{left // self._ZOOMIN}\t{right // self._ZOOMIN}\t{top // self._ZOOMIN}\t{bottom // self._ZOOMIN}##"
 
                     if parse_method == "manual":
                         sections.append((block_content, label, tag))
@@ -509,6 +522,16 @@ def crop(self, text: str, need_position: bool = False):
 
             img0 = self.page_images[pns[0]]
             x0, y0, x1, y1 = int(left), int(top), int(right), int(min(bottom, img0.size[1]))
+            if x0 > x1:
+                x0, x1 = x1, x0
+            if y0 > y1:
+                y0, y1 = y1, y0
+            x0 = max(0, min(x0, img0.size[0]))
+            x1 = max(0, min(x1, img0.size[0]))
+            y0 = max(0, min(y0, img0.size[1]))
+            y1 = max(0, min(y1, img0.size[1]))
+            if x1 <= x0 or y1 <= y0:
+                continue
             crop0 = img0.crop((x0, y0, x1, y1))
             imgs.append(crop0)
             if 0 < ii < len(poss) - 1:
@@ -521,6 +544,17 @@ def crop(self, text: str, need_position: bool = False):
                     continue
                 page = self.page_images[pn]
                 x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1]))
+                if x0 > x1:
+                    x0, x1 = x1, x0
+                if y0 > y1:
+                    y0, y1 = y1, y0
+                x0 = max(0, min(x0, page.size[0]))
+                x1 = max(0, min(x1, page.size[0]))
+                y0 = max(0, min(y0, page.size[1]))
+                y1 = max(0, min(y1, page.size[1]))
+                if x1 <= x0 or y1 <= y0:
+                    bottom -= page.size[1]
+                    continue
                 cimgp = page.crop((x0, y0, x1, y1))
                 imgs.append(cimgp)
                 if 0 < ii < len(poss) - 1: