@@ -49,6 +49,7 @@ def __init__(
4949 self .llm_interceptors : List [LlmInterceptor ] = []
5050 self .is_classify_image : bool = False
5151 self ._skip_loading : bool = False
52+ self .chunk_height : int = 1500
5253
5354 def add_interceptor (
5455 self , interceptor : Union [LoaderInterceptor , LlmInterceptor ]
@@ -184,16 +185,27 @@ def _map_to_universal_format(
184185 Maps loaded content to a universal format that _extract can process.
185186 The universal format is:
186187 {
187- "content": str, # The text content
188- "images": List[bytes], # Optional list of image bytes if vision=True
189- "metadata": Dict[str, Any] # Optional metadata
188+ "content": str, # The text content (joined from pages)
189+ "images": List[bytes],
190+ # Optional list of image bytes if vision=True (can hold multiple)
191+ "metadata": {}
190192 }
191193 """
192194 if content is None :
193195 return {"content" : "" , "images" : [], "metadata" : {}}
194196
195197 # If content is already in universal format, return as is
196198 if isinstance (content , dict ) and "content" in content :
199+ # Ensure 'images' is a list
200+ if "image" in content and "images" not in content :
201+ # Merge single 'image' into 'images'
202+ content ["images" ] = [content ["image" ]] if content ["image" ] else []
203+ del content ["image" ]
204+ elif "images" in content and not isinstance (content ["images" ], list ):
205+ # If 'images' is mistakenly a single byte blob, fix it
206+ content ["images" ] = [content ["images" ]] if content ["images" ] else []
207+ elif "images" not in content :
208+ content ["images" ] = []
197209 return content
198210
199211 # Handle list of pages from document loader
@@ -207,8 +219,13 @@ def _map_to_universal_format(
207219 if 'content' in page :
208220 text_content .append (page ['content' ])
209221 # Extract images if vision mode is enabled
210- if vision and 'image' in page :
211- images .append (page ['image' ])
222+ if vision :
223+ # If there's a list of images
224+ if 'images' in page and isinstance (page ['images' ], list ):
225+ images .extend (page ['images' ])
226+ # Or just a single 'image'
227+ elif 'image' in page and page ['image' ]:
228+ images .append (page ['image' ])
212229
213230 return {
214231 "content" : "\n \n " .join (text_content ) if text_content else "" ,
@@ -230,11 +247,18 @@ def _map_to_universal_format(
230247 if isinstance (text_content , list ):
231248 text_content = "\n " .join (text_content )
232249
250+ images = []
251+ if vision :
252+ if "images" in content and isinstance (content ["images" ], list ):
253+ images .extend (content ["images" ])
254+ elif "image" in content and content ["image" ]:
255+ images .append (content ["image" ])
256+
233257 return {
234258 "content" : text_content ,
235- "images" : content . get ( " images" , []) if vision else [] ,
259+ "images" : images ,
236260 "metadata" : {k : v for k , v in content .items ()
237- if k not in ["text" , "images" , "content" ]}
261+ if k not in ["text" , "images" , "image" , " content" ]}
238262 }
239263
240264 raise ValueError (f"Unsupported content format: { type (content )} " )
@@ -1067,7 +1091,7 @@ def _add_images_to_message_content(
10671091 elif isinstance (content , dict ):
10681092 # Handle legacy format
10691093 image_data = content .get ('image' ) or content .get ('images' )
1070- self ._append_images (image_data , message_content )
1094+ self ._append_images (image_data [ 0 ] , message_content )
10711095
10721096 def _append_images (
10731097 self ,
@@ -1078,27 +1102,54 @@ def _append_images(
10781102 Append images to the message content.
10791103
10801104 Args:
1081- image_data: The image data to process.
1105+ image_data: The image data to process. Can be:
1106+ - A dictionary with 'image' or 'images' keys
1107+ - A list of images
1108+ - A single image
10821109 message_content: The message content to append images to.
10831110 """
10841111 if not image_data :
10851112 return
10861113
1114+ images_list = []
10871115 if isinstance (image_data , dict ):
1088- images_list = image_data .values ()
1116+ # Handle dictionary format
1117+ if "images" in image_data :
1118+ # If "images" key exists, it should be a list of images
1119+ if isinstance (image_data ["images" ], list ):
1120+ images_list .extend (image_data ["images" ])
1121+ else :
1122+ # Single image in "images" key
1123+ images_list .append (image_data ["images" ])
1124+ elif "image" in image_data and image_data ["image" ] is not None :
1125+ # Single image in "image" key
1126+ images_list .append (image_data ["image" ])
10891127 elif isinstance (image_data , list ):
1090- images_list = image_data
1128+ # Process list of images or image dictionaries
1129+ for item in image_data :
1130+ if isinstance (item , dict ):
1131+ # Handle nested image dictionaries
1132+ if "images" in item and isinstance (item ["images" ], list ):
1133+ images_list .extend (item ["images" ])
1134+ elif "image" in item and item ["image" ] is not None :
1135+ images_list .append (item ["image" ])
1136+ else :
1137+ # Raw image data
1138+ images_list .append (item )
10911139 else :
1092- images_list = [image_data ]
1140+ # Single raw image
1141+ images_list .append (image_data )
10931142
1143+ # Process all collected images
10941144 for img in images_list :
1095- base64_image = encode_image (img )
1096- message_content .append ({
1097- "type" : "image_url" ,
1098- "image_url" : {
1099- "url" : f"data:image/jpeg;base64,{ base64_image } "
1100- }
1101- })
1145+ if img is not None : # Skip None values
1146+ base64_image = encode_image (img )
1147+ message_content .append ({
1148+ "type" : "image_url" ,
1149+ "image_url" : {
1150+ "url" : f"data:image/jpeg;base64,{ base64_image } "
1151+ }
1152+ })
11021153
11031154 def _build_messages (
11041155 self ,
0 commit comments