From 1fa0d364ce9e33ac1a96efc70deefceb8682fb73 Mon Sep 17 00:00:00 2001 From: Federico Rampazzo Date: Tue, 19 Nov 2024 13:14:45 +0200 Subject: [PATCH] Prefer using provided filename over detection from file.name --- unstructured/file_utils/filetype.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/unstructured/file_utils/filetype.py b/unstructured/file_utils/filetype.py index d109cd7384..06152764bb 100644 --- a/unstructured/file_utils/filetype.py +++ b/unstructured/file_utils/filetype.py @@ -297,14 +297,15 @@ def encoding(self) -> str: @lazyproperty def extension(self) -> str: """Best filename-extension we can muster, "" when there is no available source.""" - # -- get from file_path, or file when it has a name (path) -- - with self.open() as file: - if hasattr(file, "name") and file.name: - return os.path.splitext(file.name)[1].lower() - # -- otherwise use metadata file-path when provided -- + # -- use metadata file-path when provided -- if file_path := self._metadata_file_path: return os.path.splitext(file_path)[1].lower() + + # -- otherwise get from file_path, or file when it has a name (path) -- + with self.open() as file: + if hasattr(file, "name") and file.name: + return os.path.splitext(file.name)[1].lower() # -- otherwise empty str means no extension, same as a path like "a/b/name-no-ext" -- return ""