Skip to content

Commit 1cf12e9

Browse files
authored
chore: streamline kwarg handling (#264)
Improves kwarg handling so that kwargs are handled explicitly when needed, and suppressed otherwise.
1 parent 326f180 commit 1cf12e9

File tree

4 files changed

+10
-17
lines changed

4 files changed

+10
-17
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
## 0.7.10-dev2
1+
## 0.7.10
22

3+
* Handle kwargs explicitly when needed, suppress otherwise
34
* fix: Reduce Chipper memory consumption on x86_64 cpus
45
* fix: Skips ordering elements coming from Chipper
56
* fix: After refactoring to introduce Chipper, annotate() weren't able to show text with extra info from elements, this is fixed now.

test_unstructured_inference/inference/test_layout.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os.path
33
import tempfile
44
from functools import partial
5-
from unittest.mock import mock_open, patch
5+
from unittest.mock import ANY, mock_open, patch
66

77
import numpy as np
88
import pytest
@@ -675,6 +675,8 @@ def test_process_file_with_model_routing(monkeypatch, model_type, is_detection_m
675675
fixed_layouts=None,
676676
extract_tables=False,
677677
pdf_image_dpi=200,
678+
extract_images_in_pdf=ANY,
679+
image_output_dir_path=ANY,
678680
)
679681

680682

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.10-dev2" # pragma: no cover
1+
__version__ = "0.7.10" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,7 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout:
7171
def from_file(
7272
cls,
7373
filename: str,
74-
detection_model: Optional[UnstructuredObjectDetectionModel] = None,
75-
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
7674
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
77-
extract_tables: bool = False,
7875
pdf_image_dpi: int = 200,
7976
**kwargs,
8077
) -> DocumentLayout:
@@ -108,11 +105,8 @@ def from_file(
108105
image,
109106
number=i + 1,
110107
document_filename=filename,
111-
detection_model=detection_model,
112-
element_extraction_model=element_extraction_model,
113108
layout=layout,
114109
fixed_layout=fixed_layout,
115-
extract_tables=extract_tables,
116110
**kwargs,
117111
)
118112
pages.append(page)
@@ -453,10 +447,6 @@ def from_image(
453447
def process_data_with_model(
454448
data: BinaryIO,
455449
model_name: Optional[str],
456-
is_image: bool = False,
457-
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
458-
extract_tables: bool = False,
459-
pdf_image_dpi: int = 200,
460450
**kwargs,
461451
) -> DocumentLayout:
462452
"""Processes pdf file in the form of a file handler (supporting a read method) into a
@@ -467,10 +457,6 @@ def process_data_with_model(
467457
layout = process_file_with_model(
468458
tmp_file.name,
469459
model_name,
470-
is_image=is_image,
471-
fixed_layouts=fixed_layouts,
472-
extract_tables=extract_tables,
473-
pdf_image_dpi=pdf_image_dpi,
474460
**kwargs,
475461
)
476462

@@ -484,6 +470,8 @@ def process_file_with_model(
484470
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
485471
extract_tables: bool = False,
486472
pdf_image_dpi: int = 200,
473+
extract_images_in_pdf: bool = False,
474+
image_output_dir_path: Optional[str] = None,
487475
**kwargs,
488476
) -> DocumentLayout:
489477
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -514,6 +502,8 @@ def process_file_with_model(
514502
fixed_layouts=fixed_layouts,
515503
extract_tables=extract_tables,
516504
pdf_image_dpi=pdf_image_dpi,
505+
extract_images_in_pdf=extract_images_in_pdf,
506+
image_output_dir_path=image_output_dir_path,
517507
**kwargs,
518508
)
519509
)

0 commit comments

Comments
 (0)