@@ -115,7 +115,7 @@ class PageLayout:
115115 def __init__ (
116116 self ,
117117 number : int ,
118- image : Image ,
118+ image : Image . Image ,
119119 layout : Optional [List [TextRegion ]],
120120 model : Optional [UnstructuredModel ] = None ,
121121 ocr_strategy : str = "auto" ,
@@ -202,6 +202,7 @@ def process_data_with_model(
202202 is_image : bool = False ,
203203 ocr_strategy : str = "auto" ,
204204 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
205+ extract_tables : bool = False ,
205206) -> DocumentLayout :
206207 """Processes pdf file in the form of a file handler (supporting a read method) into a
207208 DocumentLayout by using a model identified by model_name."""
@@ -213,6 +214,7 @@ def process_data_with_model(
213214 is_image = is_image ,
214215 ocr_strategy = ocr_strategy ,
215216 fixed_layouts = fixed_layouts ,
217+ extract_tables = extract_tables ,
216218 )
217219
218220 return layout
@@ -224,15 +226,22 @@ def process_file_with_model(
224226 is_image : bool = False ,
225227 ocr_strategy : str = "auto" ,
226228 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
229+ extract_tables : bool = False ,
227230) -> DocumentLayout :
228231 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
229232 model_name."""
230233 model = get_model (model_name )
231234 layout = (
232- DocumentLayout .from_image_file (filename , model = model , ocr_strategy = ocr_strategy )
235+ DocumentLayout .from_image_file (
236+ filename , model = model , ocr_strategy = ocr_strategy , extract_tables = extract_tables
237+ )
233238 if is_image
234239 else DocumentLayout .from_file (
235- filename , model = model , ocr_strategy = ocr_strategy , fixed_layouts = fixed_layouts
240+ filename ,
241+ model = model ,
242+ ocr_strategy = ocr_strategy ,
243+ fixed_layouts = fixed_layouts ,
244+ extract_tables = extract_tables ,
236245 )
237246 )
238247 return layout
0 commit comments