Skip to content

Commit 4148834

Browse files
fix: correct order of kwargs in pandoc (#421)
* fix: correct order of kwargs in pandoc * only skip epub tests in Docker * changelog --------- Co-authored-by: Crag Wolfe <[email protected]> Co-authored-by: cragwolfe <[email protected]>
1 parent 59785e4 commit 4148834

File tree

6 files changed

+18
-9
lines changed

6 files changed

+18
-9
lines changed

Diff for: CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.5.8-dev6
1+
## 0.5.8
22

33
### Enhancements
44

@@ -16,6 +16,7 @@
1616
### Fixes
1717

1818
* `convert_file_to_text` now passes through the `source_format` and `target_format` kwargs.
19+
Previously they were hard coded.
1920
* Partitioning functions that accept a `text` kwarg no longer raise an error if an empty
2021
string is passed (and empty list of elements is returned instead).
2122
* `partition_json` no longer fails if the input is an empty list.

Diff for: test_unstructured/partition/test_auto.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
ListItem(text="Violets are blue"),
2929
]
3030

31+
is_in_docker = os.path.exists("/.dockerenv")
32+
3133

3234
def test_auto_partition_email_from_filename():
3335
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-email.eml")
@@ -312,7 +314,7 @@ def test_auto_partition_pptx_from_filename():
312314
assert elements[0].metadata.filename == filename
313315

314316

315-
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
317+
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
316318
def test_auto_partition_ppt_from_filename():
317319
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.ppt")
318320
elements = partition(filename=filename)
@@ -326,15 +328,15 @@ def test_auto_with_page_breaks():
326328
assert PageBreak() in elements
327329

328330

329-
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
331+
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
330332
def test_auto_partition_epub_from_filename():
331333
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
332334
elements = partition(filename=filename)
333335
assert len(elements) > 0
334336
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
335337

336338

337-
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
339+
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
338340
def test_auto_partition_epub_from_file():
339341
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
340342
with open(filename, "rb") as f:

Diff for: test_unstructured/partition/test_epub.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77

88
DIRECTORY = pathlib.Path(__file__).parent.resolve()
99

10+
is_in_docker = os.path.exists("/.dockerenv")
1011

11-
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
12+
13+
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
1214
def test_partition_epub_from_filename():
1315
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
1416
elements = partition_epub(filename=filename)
1517
assert len(elements) > 0
1618
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
1719

1820

19-
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
21+
@pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container")
2022
def test_partition_epub_from_file():
2123
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
2224
with open(filename, "rb") as f:

Diff for: test_unstructured/partition/test_json.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
DIRECTORY = pathlib.Path(__file__).parent.resolve()
1212

13+
is_in_docker = os.path.exists("/.dockerenv")
14+
1315
test_files = [
1416
"fake-text.txt",
1517
"layout-parser-paper-fast.pdf",
@@ -18,12 +20,14 @@
1820
"fake-email.eml",
1921
pytest.param(
2022
"fake-power-point.ppt",
21-
marks=pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test"),
23+
marks=pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container"),
2224
),
2325
"fake.docx",
2426
"fake-power-point.pptx",
2527
]
2628

29+
is_in_docker = os.path.exists("/.dockerenv")
30+
2731

2832
@pytest.mark.parametrize("filename", test_files)
2933
def test_partition_json_from_filename(filename: str):

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.8-dev6" # pragma: no cover
1+
__version__ = "0.5.8" # pragma: no cover

Diff for: unstructured/file_utils/file_conversion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
def convert_file_to_text(filename: str, source_format: str, target_format: str) -> str:
1010
"""Uses pandoc to convert the source document to a raw text string."""
1111
try:
12-
text = pypandoc.convert_file(filename, source_format, format=target_format)
12+
text = pypandoc.convert_file(filename, target_format, format=source_format)
1313
except FileNotFoundError as err:
1414
msg = (
1515
"Error converting the file to text. Ensure you have the pandoc "

0 commit comments

Comments
 (0)