Skip to content

Commit d298f57

Browse files
fix: issue when filename is provided but file is not on disk (#446)
1 parent e6d6509 commit d298f57

File tree

5 files changed

+17
-6
lines changed

5 files changed

+17
-6
lines changed

Diff for: CHANGELOG.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.5.10-dev2
1+
## 0.5.10
22

33
### Enhancements
44

@@ -11,6 +11,8 @@
1111

1212
### Fixes
1313

14+
* FileNotFound error when filename is provided but file is not on disk
15+
1416
## 0.5.9
1517

1618
### Enhancements

Diff for: test_unstructured/file_utils/test_filetype.py

+9
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,17 @@ def test_detect_filetype_from_filename(file, expected):
5555
],
5656
)
5757
def test_detect_filetype_from_filename_with_extension(monkeypatch, file, expected):
58+
"""Test that we detect the filetype from the filename extension when libmagic is not available
59+
or the file does not exist."""
60+
# Test when libmagic is not available
5861
monkeypatch.setattr(filetype, "LIBMAGIC_AVAILABLE", False)
5962
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, file)
6063
assert detect_filetype(filename) == expected
64+
# Test when the file does not exist
65+
monkeypatch.setattr(filetype, "LIBMAGIC_AVAILABLE", True)
66+
extension = pathlib.Path(file).suffix
67+
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "not-on-disk" + extension)
68+
assert detect_filetype(filename) == expected
6169

6270

6371
@pytest.mark.parametrize(
@@ -243,6 +251,7 @@ def test_detect_filetype_detects_png(monkeypatch):
243251

244252
def test_detect_filetype_detects_unknown_text_types_as_txt(monkeypatch):
245253
monkeypatch.setattr(magic, "from_file", lambda *args, **kwargs: "text/new-type")
254+
monkeypatch.setattr(os.path, "isfile", lambda *args, **kwargs: True)
246255
assert detect_filetype(filename="made_up.png") == FileType.TXT
247256

248257

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.10-dev2" # pragma: no cover
1+
__version__ = "0.5.10" # pragma: no cover

Diff for: unstructured/file_utils/filetype.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,12 @@ def detect_filetype(
177177
return filetype
178178

179179
if filename or file_filename:
180-
_, extension = os.path.splitext(filename or file_filename or "")
180+
_filename = filename or file_filename or ""
181+
_, extension = os.path.splitext(_filename)
181182
extension = extension.lower()
182-
if LIBMAGIC_AVAILABLE:
183+
if os.path.isfile(_filename) and LIBMAGIC_AVAILABLE:
183184
mime_type = magic.from_file(filename or file_filename, mime=True) # type: ignore
184185
else:
185-
# might not need this
186186
return EXT_TO_FILETYPE.get(extension.lower(), FileType.UNK)
187187
elif file is not None:
188188
extension = None

Diff for: unstructured/partition/auto.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def partition(
3232
3333
Parameters
3434
----------
35-
filename
35+
filename
3636
A string defining the target filename path.
3737
content_type
3838
A string defining the file content in MIME type

0 commit comments

Comments
 (0)