Skip to content

Commit 7804e0d

Browse files
authored
fix(layout): Windows support (#376)
**Summary** Remove use of `tempfile.NamedTemporaryFile` which attempts to access the temporary file by name. This fails on Windows. Use `tempfile.TemporaryDirectory` instead. Fixes #303
1 parent 4662c4f commit 7804e0d

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

Diff for: CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
## 0.7.37-dev1
1+
## 0.7.37-dev2
22

33
* refactor: remove layout analysis related code
44
* enhancement: Hide warning about table transformer weights not being loaded
5+
* fix(layout): Use TemporaryDirectory instead of NamedTemporaryFile for Windows support
56

67
## 0.7.36
78

Diff for: unstructured_inference/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.37-dev1" # pragma: no cover
1+
__version__ = "0.7.37-dev2" # pragma: no cover

Diff for: unstructured_inference/inference/layout.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import tempfile
55
from pathlib import PurePath
6-
from typing import BinaryIO, Collection, List, Optional, Union, cast
6+
from typing import Any, BinaryIO, Collection, List, Optional, Union, cast
77

88
import numpy as np
99
import pdf2image
@@ -323,15 +323,19 @@ def from_image(
323323
def process_data_with_model(
324324
data: BinaryIO,
325325
model_name: Optional[str],
326-
**kwargs,
326+
**kwargs: Any,
327327
) -> DocumentLayout:
328-
"""Processes pdf file in the form of a file handler (supporting a read method) into a
329-
DocumentLayout by using a model identified by model_name."""
330-
with tempfile.NamedTemporaryFile() as tmp_file:
331-
tmp_file.write(data.read())
332-
tmp_file.flush() # Make sure the file is written out
328+
"""Process PDF as file-like object `data` into a `DocumentLayout`.
329+
330+
Uses the model identified by `model_name`.
331+
"""
332+
with tempfile.TemporaryDirectory() as tmp_dir_path:
333+
file_path = os.path.join(tmp_dir_path, "document.pdf")
334+
with open(file_path, "wb") as f:
335+
f.write(data.read())
336+
f.flush()
333337
layout = process_file_with_model(
334-
tmp_file.name,
338+
file_path,
335339
model_name,
336340
**kwargs,
337341
)
@@ -345,7 +349,7 @@ def process_file_with_model(
345349
is_image: bool = False,
346350
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
347351
pdf_image_dpi: int = 200,
348-
**kwargs,
352+
**kwargs: Any,
349353
) -> DocumentLayout:
350354
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
351355
model_name."""

0 commit comments

Comments
 (0)