1010from typing_extensions import override
1111
1212from docling_core .transforms .visualizer .base import BaseVisualizer
13- from docling_core .types .doc .document import ContentLayer , DocItem , DoclingDocument
13+ from docling_core .types .doc .document import (
14+ ContentLayer ,
15+ DocItem ,
16+ DoclingDocument ,
17+ PictureItem ,
18+ )
1419
1520
1621class _NumberDrawingData (BaseModel ):
@@ -33,6 +38,20 @@ class Params(BaseModel):
3338 base_visualizer : Optional [BaseVisualizer ] = None
3439 params : Params = Params ()
3540
41+ def _get_picture_context (
42+ self , elem : DocItem , doc : DoclingDocument
43+ ) -> Optional [str ]:
44+ """Get the picture self_ref if element is nested inside a PictureItem, None otherwise."""
45+ current = elem
46+ while current .parent is not None :
47+ parent = current .parent .resolve (doc )
48+ if isinstance (parent , PictureItem ):
49+ return parent .self_ref
50+ if not isinstance (parent , DocItem ):
51+ break
52+ current = parent
53+ return None
54+
3655 def _draw_arrow (
3756 self ,
3857 draw : ImageDraw .ImageDraw ,
@@ -55,10 +74,10 @@ def _draw_arrow(
5574 # Calculate the arrowhead points
5675 dx = end_point [0 ] - start_point [0 ]
5776 dy = end_point [1 ] - start_point [1 ]
58- angle = (dx ** 2 + dy ** 2 ) ** 0.5 + 0.01 # Length of the arrow shaft
77+ distance = (dx ** 2 + dy ** 2 ) ** 0.5 + 0.01 # Length of the arrow shaft
5978
6079 # Normalized direction vector for the arrow shaft
61- ux , uy = dx / angle , dy / angle
80+ ux , uy = dx / distance , dy / distance
6281
6382 # Base of the arrowhead
6483 base_x = end_point [0 ] - ux * arrowhead_length
@@ -89,16 +108,34 @@ def _draw_doc_reading_order(
89108 except OSError :
90109 # Fallback to default font if arial is not available
91110 font = ImageFont .load_default ()
92- x0 , y0 = None , None
93- number_data_to_draw : dict [Optional [int ], list [_NumberDrawingData ]] = {}
94- my_images : dict [Optional [int ], Image ] = images or {}
95- prev_page = None
96- i = 0
111+
112+ # Separate reading order paths for outside vs inside pictures
113+ # Key: (page_no, picture_ref_or_None) -> (x0, y0, element_index)
114+ # picture_ref is None for elements outside any picture, otherwise the picture's self_ref
115+ reading_order_state : dict [
116+ tuple [int , Optional [str ]], tuple [float , float , int ]
117+ ] = {}
118+ number_data_to_draw : dict [int , list [_NumberDrawingData ]] = {}
119+ # Only int keys are used (from prov.page_no), even if input images has Optional[int] keys
120+ my_images : dict [int , Image ] = {
121+ k : v for k , v in (images or {}).items () if k is not None
122+ }
123+ prev_page : Optional [int ] = None
124+ element_index = 0
125+
97126 for elem , _ in doc .iterate_items (
98127 included_content_layers = self .params .content_layers ,
128+ traverse_pictures = True ,
99129 ):
100130 if not isinstance (elem , DocItem ):
101131 continue
132+
133+ picture_ref = self ._get_picture_context (elem , doc )
134+ # Include all elements in reading order:
135+ # - Top-level PictureItems are part of the outer reading order (picture_ref is None)
136+ # - Nested PictureItems are part of their parent picture's reading order (picture_ref is not None)
137+ # - Other elements follow the same pattern
138+
102139 if len (elem .prov ) == 0 :
103140 continue # Skip elements without provenances
104141
@@ -110,9 +147,9 @@ def _draw_doc_reading_order(
110147 number_data_to_draw [page_no ] = []
111148
112149 if image is None or prev_page is None or page_no != prev_page :
113- # new page begins
150+ # new page begins - reset all reading order paths
114151 prev_page = page_no
115- x0 = y0 = None
152+ reading_order_state . clear ()
116153
117154 if image is None :
118155 page_image = doc .pages [page_no ].image
@@ -140,35 +177,34 @@ def _draw_doc_reading_order(
140177 if ro_bbox .b > ro_bbox .t :
141178 ro_bbox .b , ro_bbox .t = ro_bbox .t , ro_bbox .b
142179
143- if x0 is None and y0 is None :
144- # is_root= True
145- x0 = (ro_bbox .l + ro_bbox .r ) / 2.0
146- y0 = (ro_bbox .b + ro_bbox .t ) / 2.0
180+ path_key = (page_no , picture_ref )
181+ state = reading_order_state .get (path_key )
147182
183+ x1 = (ro_bbox .l + ro_bbox .r ) / 2.0
184+ y1 = (ro_bbox .b + ro_bbox .t ) / 2.0
185+
186+ if state is None :
187+ # Start of a new reading order path (outside or inside picture)
188+ reading_order_state [path_key ] = (x1 , y1 , element_index )
148189 number_data_to_draw [page_no ].append (
149190 _NumberDrawingData (
150- xy = (x0 , y0 ),
151- text = f"{ i } " ,
191+ xy = (x1 , y1 ),
192+ text = f"{ element_index } " ,
152193 )
153194 )
154- i += 1
155-
195+ element_index += 1
156196 else :
157- # is_root = False
158- assert x0 is not None
159- assert y0 is not None
160-
161- x1 = (ro_bbox .l + ro_bbox .r ) / 2.0
162- y1 = (ro_bbox .b + ro_bbox .t ) / 2.0
163-
197+ # Continue existing reading order path
198+ x0 , y0 , _ = state
199+ # Use different color for picture-internal paths
200+ arrow_color = "blue" if picture_ref is not None else "red"
164201 draw = self ._draw_arrow (
165202 draw = draw ,
166203 arrow_coords = (x0 , y0 , x1 , y1 ),
167204 line_width = 2 ,
168- color = "red" ,
205+ color = arrow_color ,
169206 )
170-
171- x0 , y0 = x1 , y1
207+ reading_order_state [path_key ] = (x1 , y1 , state [2 ])
172208
173209 if self .params .show_branch_numbering :
174210 # post-drawing the numbers to ensure they are rendered on top-layer
0 commit comments