Skip to content

Commit b749b89

Browse files
authored
fix: disabled checking max pages for images (#3473)
Added fix related to #3431, which disables checking max pages for images
1 parent 147514f commit b749b89

File tree

4 files changed

+12
-6
lines changed

4 files changed

+12
-6
lines changed

Diff for: CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.15.1-dev8
1+
## 0.15.1-dev9
22

33
### Enhancements
44

Diff for: test_unstructured/partition/pdf_image/test_pdf.py

+5
Original file line numberDiff line numberDiff line change
@@ -1372,14 +1372,18 @@ def test_analysis_artifacts_saved():
13721372
("pdf/layout-parser-paper-with-empty-pages.pdf", 3, True),
13731373
("pdf/reliance.pdf", 3, False),
13741374
("pdf/reliance.pdf", 2, True),
1375+
("img/DA-1p.jpg", None, False),
1376+
("img/DA-1p.jpg", 2, False),
13751377
],
13761378
)
13771379
def test_pdf_hi_res_max_pages_argument(filename, pdf_hi_res_max_pages, expected_error):
1380+
is_image = not Path(filename).suffix.endswith("pdf")
13781381
if not expected_error:
13791382
pdf.partition_pdf_or_image(
13801383
filename=example_doc_path(filename),
13811384
strategy=PartitionStrategy.HI_RES,
13821385
pdf_hi_res_max_pages=pdf_hi_res_max_pages,
1386+
is_image=is_image,
13831387
)
13841388

13851389
else:
@@ -1388,4 +1392,5 @@ def test_pdf_hi_res_max_pages_argument(filename, pdf_hi_res_max_pages, expected_
13881392
filename=example_doc_path(filename),
13891393
strategy=PartitionStrategy.HI_RES,
13901394
pdf_hi_res_max_pages=pdf_hi_res_max_pages,
1395+
is_image=is_image,
13911396
)

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.15.1-dev8" # pragma: no cover
1+
__version__ = "0.15.1-dev9" # pragma: no cover

Diff for: unstructured/partition/pdf.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ def _get_pdf_page_number(
518518
elif filename:
519519
number_of_pages = PdfReader(filename).get_num_pages()
520520
else:
521-
ValueError("Either 'file' or 'filename' must be provided.")
521+
raise ValueError("Either 'file' or 'filename' must be provided.")
522522
return number_of_pages
523523

524524

@@ -575,9 +575,10 @@ def _partition_pdf_or_image_local(
575575
process_file_with_pdfminer,
576576
)
577577

578-
check_pdf_hi_res_max_pages_exceeded(
579-
filename=filename, file=file, pdf_hi_res_max_pages=pdf_hi_res_max_pages
580-
)
578+
if not is_image:
579+
check_pdf_hi_res_max_pages_exceeded(
580+
filename=filename, file=file, pdf_hi_res_max_pages=pdf_hi_res_max_pages
581+
)
581582

582583
hi_res_model_name = hi_res_model_name or model_name or default_hi_res_model()
583584
if pdf_image_dpi is None:

0 commit comments

Comments
 (0)