Skip to content

Commit 76831f1

Browse files
refactor: partition_pdf() pass kwargs through fast strategy pipeline (#3040)
This PR aims to pass `kwargs` through `fast` strategy pipeline, which was missing as part of the previous PR - #3030. I also did some code refactoring in this PR, so I recommend reviewing this PR commit by commit. ### Summary - pass `kwargs` through `fast` strategy pipeline, which will allow users to specify additional params like `sort_mode` - refactor: code reorganization - cut a release for `0.14.0` ### Testing CI should pass
1 parent 9cd0e70 commit 76831f1

File tree

6 files changed

+320
-313
lines changed

6 files changed

+320
-313
lines changed

Diff for: CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.14.0-dev15
1+
## 0.14.0
22

33
### BREAKING CHANGES
44

Diff for: test_unstructured/partition/pdf_image/test_image.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def test_partition_image_metadata_date(
326326
):
327327
mocked_last_modification_date = "2029-07-05T09:24:28"
328328
mocker.patch(
329-
"unstructured.partition.pdf.get_last_modified_date",
329+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date",
330330
return_value=mocked_last_modification_date,
331331
)
332332
elements = image.partition_image(filename=filename)
@@ -340,7 +340,7 @@ def test_partition_image_with_hi_res_strategy_metadata_date(
340340
):
341341
mocked_last_modification_date = "2029-07-05T09:24:28"
342342
mocker.patch(
343-
"unstructured.partition.pdf.get_last_modified_date",
343+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date",
344344
return_value=mocked_last_modification_date,
345345
)
346346
elements = image.partition_image(filename=filename, strategy=PartitionStrategy.HI_RES)
@@ -356,7 +356,7 @@ def test_partition_image_metadata_date_custom_metadata_date(
356356
expected_last_modification_date = "2009-07-05T09:24:28"
357357

358358
mocker.patch(
359-
"unstructured.partition.pdf.get_last_modified_date",
359+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date",
360360
return_value=mocked_last_modification_date,
361361
)
362362
elements = image.partition_image(
@@ -375,7 +375,7 @@ def test_partition_image_with_hi_res_strategy_metadata_date_custom_metadata_date
375375
expected_last_modification_date = "2009-07-05T09:24:28"
376376

377377
mocker.patch(
378-
"unstructured.partition.pdf.get_last_modified_date",
378+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date",
379379
return_value=mocked_last_modification_date,
380380
)
381381
elements = image.partition_image(
@@ -393,7 +393,7 @@ def test_partition_image_from_file_metadata_date(
393393
):
394394
mocked_last_modification_date = "2029-07-05T09:24:28"
395395
mocker.patch(
396-
"unstructured.partition.pdf.get_last_modified_date_from_file",
396+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
397397
return_value=mocked_last_modification_date,
398398
)
399399
with open(filename, "rb") as f:
@@ -408,7 +408,7 @@ def test_partition_image_from_file_explicit_get_metadata_date(
408408
):
409409
mocked_last_modification_date = "2029-07-05T09:24:28"
410410
mocker.patch(
411-
"unstructured.partition.pdf.get_last_modified_date_from_file",
411+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
412412
return_value=mocked_last_modification_date,
413413
)
414414
with open(filename, "rb") as f:
@@ -423,7 +423,7 @@ def test_partition_image_from_file_with_hi_res_strategy_metadata_date(
423423
):
424424
mocked_last_modification_date = "2029-07-05T09:24:28"
425425
mocker.patch(
426-
"unstructured.partition.pdf.get_last_modified_date_from_file",
426+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
427427
return_value=mocked_last_modification_date,
428428
)
429429

@@ -439,7 +439,7 @@ def test_partition_image_from_file_with_hi_res_strategy_explicit_get_metadata_da
439439
):
440440
mocked_last_modification_date = "2029-07-05T09:24:28"
441441
mocker.patch(
442-
"unstructured.partition.pdf.get_last_modified_date_from_file",
442+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
443443
return_value=mocked_last_modification_date,
444444
)
445445

@@ -459,7 +459,7 @@ def test_partition_image_from_file_metadata_date_custom_metadata_date(
459459
expected_last_modification_date = "2009-07-05T09:24:28"
460460

461461
mocker.patch(
462-
"unstructured.partition.pdf.get_last_modified_date_from_file",
462+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
463463
return_value=mocked_last_modification_date,
464464
)
465465
with open(filename, "rb") as f:
@@ -479,7 +479,7 @@ def test_partition_image_from_file_with_hi_res_strategy_metadata_date_custom_met
479479
expected_last_modification_date = "2009-07-05T09:24:28"
480480

481481
mocker.patch(
482-
"unstructured.partition.pdf.get_last_modified_date_from_file",
482+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
483483
return_value=mocked_last_modification_date,
484484
)
485485
with open(filename, "rb") as f:

Diff for: test_unstructured/partition/pdf_image/test_pdf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -715,11 +715,11 @@ def test_partition_pdf_metadata_date(
715715
expected_last_modification_date = None
716716

717717
mocker.patch(
718-
"unstructured.partition.pdf.get_last_modified_date_from_file",
718+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date_from_file",
719719
return_value=mocked_last_modification_date,
720720
)
721721
mocker.patch(
722-
"unstructured.partition.pdf.get_last_modified_date",
722+
"unstructured.partition.pdf_image.pdf_image_utils.get_last_modified_date",
723723
return_value=mocked_last_modification_date,
724724
)
725725

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.14.0-dev15" # pragma: no cover
1+
__version__ = "0.14.0" # pragma: no cover

0 commit comments

Comments
 (0)