Skip to content

Commit 09ff1bc

Browse files
Fix: paddle ocr coordinate lower > upper (#13630)
### What problem does this PR solve? Fix: paddle ocr coordinate lower > upper #13618 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
1 parent 0545801 commit 09ff1bc

File tree

1 file changed

+35
-1
lines changed

1 file changed

+35
-1
lines changed

deepdoc/parser/paddleocr_parser.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ def _remove_images_from_markdown(markdown: str) -> str:
5959
return _MARKDOWN_IMAGE_PATTERN.sub("", markdown)
6060

6161

62+
def _normalize_bbox(bbox: list[Any] | tuple[Any, ...]) -> tuple[float, float, float, float]:
63+
if len(bbox) < 4:
64+
return 0.0, 0.0, 0.0, 0.0
65+
66+
left, top, right, bottom = (float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]))
67+
if left > right:
68+
left, right = right, left
69+
if top > bottom:
70+
top, bottom = bottom, top
71+
return left, top, right, bottom
72+
73+
6274
@dataclass
6375
class PaddleOCRVLConfig:
6476
"""Configuration for PaddleOCR-VL algorithm."""
@@ -393,8 +405,9 @@ def _transfer_to_sections(self, result: dict[str, Any], algorithm: AlgorithmType
393405

394406
label = block.get("block_label", "")
395407
block_bbox = block.get("block_bbox", [0, 0, 0, 0])
408+
left, top, right, bottom = _normalize_bbox(block_bbox)
396409

397-
tag = f"@@{page_idx + 1}\t{block_bbox[0] // self._ZOOMIN}\t{block_bbox[2] // self._ZOOMIN}\t{block_bbox[1] // self._ZOOMIN}\t{block_bbox[3] // self._ZOOMIN}##"
410+
tag = f"@@{page_idx + 1}\t{left // self._ZOOMIN}\t{right // self._ZOOMIN}\t{top // self._ZOOMIN}\t{bottom // self._ZOOMIN}##"
398411

399412
if parse_method == "manual":
400413
sections.append((block_content, label, tag))
@@ -509,6 +522,16 @@ def crop(self, text: str, need_position: bool = False):
509522

510523
img0 = self.page_images[pns[0]]
511524
x0, y0, x1, y1 = int(left), int(top), int(right), int(min(bottom, img0.size[1]))
525+
if x0 > x1:
526+
x0, x1 = x1, x0
527+
if y0 > y1:
528+
y0, y1 = y1, y0
529+
x0 = max(0, min(x0, img0.size[0]))
530+
x1 = max(0, min(x1, img0.size[0]))
531+
y0 = max(0, min(y0, img0.size[1]))
532+
y1 = max(0, min(y1, img0.size[1]))
533+
if x1 <= x0 or y1 <= y0:
534+
continue
512535
crop0 = img0.crop((x0, y0, x1, y1))
513536
imgs.append(crop0)
514537
if 0 < ii < len(poss) - 1:
@@ -521,6 +544,17 @@ def crop(self, text: str, need_position: bool = False):
521544
continue
522545
page = self.page_images[pn]
523546
x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1]))
547+
if x0 > x1:
548+
x0, x1 = x1, x0
549+
if y0 > y1:
550+
y0, y1 = y1, y0
551+
x0 = max(0, min(x0, page.size[0]))
552+
x1 = max(0, min(x1, page.size[0]))
553+
y0 = max(0, min(y0, page.size[1]))
554+
y1 = max(0, min(y1, page.size[1]))
555+
if x1 <= x0 or y1 <= y0:
556+
bottom -= page.size[1]
557+
continue
524558
cimgp = page.crop((x0, y0, x1, y1))
525559
imgs.append(cimgp)
526560
if 0 < ii < len(poss) - 1:

0 commit comments

Comments
 (0)