@@ -71,10 +71,7 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout:
7171 def from_file (
7272 cls ,
7373 filename : str ,
74- detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
75- element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
7674 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
77- extract_tables : bool = False ,
7875 pdf_image_dpi : int = 200 ,
7976 ** kwargs ,
8077 ) -> DocumentLayout :
@@ -108,11 +105,8 @@ def from_file(
108105 image ,
109106 number = i + 1 ,
110107 document_filename = filename ,
111- detection_model = detection_model ,
112- element_extraction_model = element_extraction_model ,
113108 layout = layout ,
114109 fixed_layout = fixed_layout ,
115- extract_tables = extract_tables ,
116110 ** kwargs ,
117111 )
118112 pages .append (page )
@@ -453,10 +447,6 @@ def from_image(
453447def process_data_with_model (
454448 data : BinaryIO ,
455449 model_name : Optional [str ],
456- is_image : bool = False ,
457- fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
458- extract_tables : bool = False ,
459- pdf_image_dpi : int = 200 ,
460450 ** kwargs ,
461451) -> DocumentLayout :
462452 """Processes pdf file in the form of a file handler (supporting a read method) into a
@@ -467,10 +457,6 @@ def process_data_with_model(
467457 layout = process_file_with_model (
468458 tmp_file .name ,
469459 model_name ,
470- is_image = is_image ,
471- fixed_layouts = fixed_layouts ,
472- extract_tables = extract_tables ,
473- pdf_image_dpi = pdf_image_dpi ,
474460 ** kwargs ,
475461 )
476462
@@ -484,6 +470,8 @@ def process_file_with_model(
484470 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
485471 extract_tables : bool = False ,
486472 pdf_image_dpi : int = 200 ,
473+ extract_images_in_pdf : bool = False ,
474+ image_output_dir_path : Optional [str ] = None ,
487475 ** kwargs ,
488476) -> DocumentLayout :
489477 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -514,6 +502,8 @@ def process_file_with_model(
514502 fixed_layouts = fixed_layouts ,
515503 extract_tables = extract_tables ,
516504 pdf_image_dpi = pdf_image_dpi ,
505+ extract_images_in_pdf = extract_images_in_pdf ,
506+ image_output_dir_path = image_output_dir_path ,
517507 ** kwargs ,
518508 )
519509 )
0 commit comments