@@ -59,6 +59,18 @@ def _remove_images_from_markdown(markdown: str) -> str:
5959 return _MARKDOWN_IMAGE_PATTERN .sub ("" , markdown )
6060
6161
62+ def _normalize_bbox (bbox : list [Any ] | tuple [Any , ...]) -> tuple [float , float , float , float ]:
63+ if len (bbox ) < 4 :
64+ return 0.0 , 0.0 , 0.0 , 0.0
65+
66+ left , top , right , bottom = (float (bbox [0 ]), float (bbox [1 ]), float (bbox [2 ]), float (bbox [3 ]))
67+ if left > right :
68+ left , right = right , left
69+ if top > bottom :
70+ top , bottom = bottom , top
71+ return left , top , right , bottom
72+
73+
6274@dataclass
6375class PaddleOCRVLConfig :
6476 """Configuration for PaddleOCR-VL algorithm."""
@@ -393,8 +405,9 @@ def _transfer_to_sections(self, result: dict[str, Any], algorithm: AlgorithmType
393405
394406 label = block .get ("block_label" , "" )
395407 block_bbox = block .get ("block_bbox" , [0 , 0 , 0 , 0 ])
408+ left , top , right , bottom = _normalize_bbox (block_bbox )
396409
397- tag = f"@@{ page_idx + 1 } \t { block_bbox [ 0 ] // self ._ZOOMIN } \t { block_bbox [ 2 ] // self ._ZOOMIN } \t { block_bbox [ 1 ] // self ._ZOOMIN } \t { block_bbox [ 3 ] // self ._ZOOMIN } ##"
410+ tag = f"@@{ page_idx + 1 } \t { left // self ._ZOOMIN } \t { right // self ._ZOOMIN } \t { top // self ._ZOOMIN } \t { bottom // self ._ZOOMIN } ##"
398411
399412 if parse_method == "manual" :
400413 sections .append ((block_content , label , tag ))
@@ -509,6 +522,16 @@ def crop(self, text: str, need_position: bool = False):
509522
510523 img0 = self .page_images [pns [0 ]]
511524 x0 , y0 , x1 , y1 = int (left ), int (top ), int (right ), int (min (bottom , img0 .size [1 ]))
525+ if x0 > x1 :
526+ x0 , x1 = x1 , x0
527+ if y0 > y1 :
528+ y0 , y1 = y1 , y0
529+ x0 = max (0 , min (x0 , img0 .size [0 ]))
530+ x1 = max (0 , min (x1 , img0 .size [0 ]))
531+ y0 = max (0 , min (y0 , img0 .size [1 ]))
532+ y1 = max (0 , min (y1 , img0 .size [1 ]))
533+ if x1 <= x0 or y1 <= y0 :
534+ continue
512535 crop0 = img0 .crop ((x0 , y0 , x1 , y1 ))
513536 imgs .append (crop0 )
514537 if 0 < ii < len (poss ) - 1 :
@@ -521,6 +544,17 @@ def crop(self, text: str, need_position: bool = False):
521544 continue
522545 page = self .page_images [pn ]
523546 x0 , y0 , x1 , y1 = int (left ), 0 , int (right ), int (min (bottom , page .size [1 ]))
547+ if x0 > x1 :
548+ x0 , x1 = x1 , x0
549+ if y0 > y1 :
550+ y0 , y1 = y1 , y0
551+ x0 = max (0 , min (x0 , page .size [0 ]))
552+ x1 = max (0 , min (x1 , page .size [0 ]))
553+ y0 = max (0 , min (y0 , page .size [1 ]))
554+ y1 = max (0 , min (y1 , page .size [1 ]))
555+ if x1 <= x0 or y1 <= y0 :
556+ bottom -= page .size [1 ]
557+ continue
524558 cimgp = page .crop ((x0 , y0 , x1 , y1 ))
525559 imgs .append (cimgp )
526560 if 0 < ii < len (poss ) - 1 :
0 commit comments