Skip to content

Commit 3b6ff0e

Browse files
committed
ENH: create FileWrapper method for retrieving text stream (#1492)
1 parent a1c2aec commit 3b6ff0e

File tree

5 files changed

+94
-121
lines changed

5 files changed

+94
-121
lines changed

src/xtgeo/io/_file.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import contextlib
56
import io
67
import os
78
import pathlib
@@ -12,7 +13,9 @@
1213
from enum import Enum
1314
from os.path import join
1415
from tempfile import mkstemp
15-
from typing import TYPE_CHECKING, Literal, Union
16+
from typing import TYPE_CHECKING, Generator, Literal, TextIO, Union
17+
18+
from typing_extensions import Self
1619

1720
import xtgeo._cxtgeo
1821
from xtgeo.common.exceptions import InvalidFileFormatError
@@ -646,3 +649,28 @@ def _format_from_contents(self) -> FileFormat:
646649
return FileFormat.TSURF
647650

648651
return FileFormat.UNKNOWN
652+
653+
@contextlib.contextmanager
654+
def get_text_stream(self: Self) -> Generator[TextIO, None, None]:
655+
"""
656+
Context manager to handle both file paths and file-like objects for reading.
657+
Yields:
658+
A text stream (TextIO) for reading.
659+
Raises:
660+
FileNotFoundError: If the file does not exist.
661+
"""
662+
663+
if not self.check_file():
664+
raise FileNotFoundError(f"\nFile {self.name}:\nThe file does not exist.")
665+
666+
if isinstance(self.file, pathlib.Path):
667+
with open(self.file, "r") as stream:
668+
yield stream
669+
elif isinstance(self.file, io.BytesIO):
670+
with io.TextIOWrapper(self.file) as text_wrapper:
671+
text_wrapper.seek(0)
672+
yield text_wrapper
673+
else:
674+
# StringIO is already a text stream
675+
self.file.seek(0)
676+
yield self.file

src/xtgeo/io/tsurf/_tsurf_reader.py

Lines changed: 8 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
import warnings
2-
from contextlib import contextmanager
32
from dataclasses import dataclass
4-
from io import BytesIO, TextIOWrapper
5-
from pathlib import Path
63
from typing import Any, Generator, TextIO
74

85
import numpy as np
@@ -681,34 +678,8 @@ def _parse_tsurf(stream: TextIO, filepath_errmsg: str) -> TSurfData:
681678
return tsurf_data
682679

683680

684-
@contextmanager
685-
def _get_text_stream(
686-
wrapped_file: FileWrapper,
687-
encoding: str = "utf-8",
688-
) -> Generator[TextIO, None, None]:
689-
"""Context manager that yields a text stream."""
690-
691-
if not wrapped_file or not wrapped_file.check_file():
692-
raise FileNotFoundError(
693-
f"\nIn file {wrapped_file.name}:\nThe file does not exist."
694-
)
695-
696-
wrapped_file.fileformat(FileFormat.TSURF.value[0], strict=True)
697-
698-
if isinstance(wrapped_file.file, Path):
699-
with open(wrapped_file.file, encoding=encoding) as stream:
700-
yield stream
701-
elif isinstance(wrapped_file.file, BytesIO):
702-
with TextIOWrapper(wrapped_file.file, encoding=encoding) as text_wrapper:
703-
yield text_wrapper
704-
else:
705-
# StringIO is already a text stream
706-
yield wrapped_file.file
707-
708-
709681
def read_tsurf(
710682
file: FileLike,
711-
encoding: str = "utf-8",
712683
) -> TSurfData:
713684
"""
714685
Read a file on the TSURF format and parse its triangulated surface data.
@@ -742,5 +713,11 @@ def read_tsurf(
742713

743714
wrapped_file = FileWrapper(file)
744715

745-
with _get_text_stream(wrapped_file, encoding) as stream:
746-
return _parse_tsurf(stream, str(wrapped_file.name))
716+
if not wrapped_file.check_file():
717+
raise FileNotFoundError(
718+
f"\nIn file {wrapped_file.name}:\nThe file does not exist."
719+
)
720+
wrapped_file.fileformat(FileFormat.TSURF.value[0], strict=True)
721+
722+
with wrapped_file.get_text_stream() as stream:
723+
return _parse_tsurf(stream, filepath_errmsg=str(wrapped_file.name))

src/xtgeo/xyz/_xyz_io.py

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44

55
import contextlib
66
from copy import deepcopy
7-
from io import BytesIO, StringIO, TextIOWrapper
8-
from pathlib import Path
9-
from typing import TYPE_CHECKING, Any, Generator, Iterable, Literal, TextIO, TypeGuard
7+
from io import StringIO
8+
from typing import TYPE_CHECKING, Any, Iterable, Literal, TypeGuard
109

1110
import numpy as np
1211
import pandas as pd
@@ -252,34 +251,6 @@ def import_zmap(
252251
return {"xname": xname, "yname": yname, "zname": zname, "values": df}
253252

254253

255-
@contextlib.contextmanager
256-
def _get_text_stream(
257-
wrapped_file: FileWrapper, encoding: str = "utf-8"
258-
) -> Generator[TextIO, None, None]:
259-
"""Context manager to handle both file paths and file-like objects for reading."""
260-
261-
# TODO: consider moving method to xtgeo.io._file: FileWrapper
262-
# Ensure that the method appropriately handles the different
263-
# configurations/file types of the FileWrapper.
264-
265-
if not wrapped_file or not wrapped_file.check_file():
266-
raise FileNotFoundError(
267-
f"\nFile {wrapped_file.name}:\nThe file does not exist."
268-
)
269-
270-
if isinstance(wrapped_file.file, Path):
271-
with open(wrapped_file.file, "r", encoding=encoding) as stream:
272-
yield stream
273-
elif isinstance(wrapped_file.file, BytesIO):
274-
with TextIOWrapper(wrapped_file.file, encoding=encoding) as text_wrapper:
275-
text_wrapper.seek(0)
276-
yield text_wrapper
277-
else:
278-
# StringIO is already a text stream
279-
wrapped_file.file.seek(0)
280-
yield wrapped_file.file
281-
282-
283254
def import_rms_attr(pfile: FileWrapper, zname: str = "Z_TVDSS") -> dict[str, Any]:
284255
"""The RMS ascii file Points format with attributes.
285256
@@ -323,7 +294,7 @@ def import_rms_attr(pfile: FileWrapper, zname: str = "Z_TVDSS") -> dict[str, Any
323294
# parse header
324295
skiprows = 0
325296

326-
with _get_text_stream(pfile) as rmsfile:
297+
with pfile.get_text_stream() as rmsfile:
327298
for iline in range(20):
328299
fields = rmsfile.readline().split()
329300
if len(fields) != 2:

tests/test_io/test_file.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,59 @@ def test_fileformat_provided_prefer_given(testdata_path, filename, expected_form
253253
xtgeo_file.fileformat(fileformat="segy", strict=True) == FileFormat.SEGY
254254

255255

256+
def test_get_text_stream(testdata_path: str) -> None:
257+
"""Test getting a text stream from a file."""
258+
file_path = pathlib.Path(testdata_path) / "surfaces/drogon/3/F5.ts"
259+
xtgeo_file = FileWrapper(file_path)
260+
with xtgeo_file.get_text_stream() as stream:
261+
assert isinstance(stream, io.TextIOWrapper)
262+
assert stream.readline().startswith("GOCAD TSurf 1")
263+
264+
265+
@pytest.mark.parametrize("filename", ["NOSUCH.EGRID", "NOSUCH/NOSUCH.EGRID"])
266+
def test_get_text_stream_file_does_not_exist(
267+
reek_grid_path: pathlib.Path, filename: str
268+
) -> None:
269+
"""Test getting a text stream from a non-existing file."""
270+
xtgeo_file = FileWrapper(reek_grid_path / filename)
271+
with pytest.raises(FileNotFoundError, match="file does not exist"):
272+
# Use __enter__ (not 'pass') to ensure proper test coverage
273+
xtgeo_file.get_text_stream().__enter__()
274+
275+
276+
def test_get_text_stream_from_binary_file(testdata_path: str) -> None:
277+
"""Test getting a text stream from a binary file."""
278+
file_path = pathlib.Path(testdata_path) / "3dgrids/reek/REEK.EGRID"
279+
xtgeo_file = FileWrapper(file_path)
280+
281+
# Document expected behavior:
282+
# UnicodeDecodeError raised when trying to read binary file as text
283+
with pytest.raises(UnicodeDecodeError), xtgeo_file.get_text_stream() as stream:
284+
stream.readlines()
285+
286+
287+
def test_get_text_stream_empty_file(testdata_path: str) -> None:
288+
"""Test getting a text stream from an empty text file."""
289+
empty_file_path = pathlib.Path(testdata_path) / "empty_file.txt"
290+
empty_file_path.touch()
291+
xtgeo_file = FileWrapper(empty_file_path)
292+
with xtgeo_file.get_text_stream() as stream:
293+
assert len(stream.readlines()) == 0
294+
295+
296+
def test_get_text_stream_from_memstream() -> None:
297+
"""Test getting a text stream from an in-memory stream."""
298+
dummy_text = "Line 1\nLine 2\nLine 3\n"
299+
with FileWrapper(io.StringIO()).get_text_stream() as stream:
300+
assert len(stream.readlines()) == 0
301+
with FileWrapper(io.BytesIO()).get_text_stream() as stream:
302+
assert len(stream.readlines()) == 0
303+
with FileWrapper(io.StringIO(dummy_text)).get_text_stream() as stream:
304+
assert len(stream.readlines()) == 3
305+
with FileWrapper(io.BytesIO(dummy_text.encode())).get_text_stream() as stream:
306+
assert len(stream.readlines()) == 3
307+
308+
256309
@pytest.mark.parametrize("strict", [False, True])
257310
@pytest.mark.parametrize("filename", SURFACE_FILE_FORMATS.keys())
258311
def test_fileformat_hdf_stream(testdata_path, filename, strict):

tests/test_io/test_tsurf/test_tsurf_reader.py

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from io import BytesIO, StringIO
1+
from io import StringIO
22
from pathlib import Path
33

44
import pytest
@@ -115,55 +115,6 @@ def test_file_string_input(tmp_path: str, complete_tsurf_file) -> None:
115115
assert result_path is not None
116116

117117

118-
def test_file_path_input(tmp_path: Path, complete_tsurf_file) -> None:
119-
"""Test reading from Path input."""
120-
# Test with Path
121-
filepath = tmp_path / "test.ts"
122-
with open(filepath, "w") as f:
123-
f.write(complete_tsurf_file)
124-
125-
result_path = read_tsurf(filepath)
126-
assert result_path is not None
127-
128-
129-
def test_file_stringio_input(complete_tsurf_file) -> None:
130-
"""Test reading from StringIO input."""
131-
result_stringio = read_tsurf(tsurf_stream(complete_tsurf_file))
132-
assert result_stringio is not None
133-
134-
135-
def test_file_bytesio_input(complete_tsurf_file) -> None:
136-
"""Test reading from BytesIO input."""
137-
result_bytesio = read_tsurf(BytesIO(complete_tsurf_file.encode("utf-8")))
138-
assert result_bytesio is not None
139-
140-
141-
def test_file_non_regular_file_input(tmp_path: Path) -> None:
142-
"""Test reading from a non-regular file (e.g., folder)."""
143-
144-
non_regular_file = tmp_path / "some_folder"
145-
non_regular_file.mkdir()
146-
147-
with pytest.raises(FileNotFoundError, match="does not exist"):
148-
read_tsurf(non_regular_file)
149-
150-
151-
def test_file_other_than_filelike_input() -> None:
152-
"""Test reading from an unsupported input type."""
153-
with pytest.raises(
154-
RuntimeError, match="Cannot instantiate <class 'xtgeo.io._file.FileWrapper'>"
155-
):
156-
read_tsurf(12345) # Invalid input type
157-
158-
159-
def test_file_empty():
160-
"""Test that empty file raises error."""
161-
with pytest.raises(
162-
ValueError, match="does not match format detected from file contents"
163-
):
164-
read_tsurf(StringIO(""))
165-
166-
167118
def test_file_unusual_suffix(minimal_tsurf_file, tmp_path: Path) -> None:
168119
"""
169120
Test with unusual file suffix.
@@ -176,13 +127,6 @@ def test_file_unusual_suffix(minimal_tsurf_file, tmp_path: Path) -> None:
176127
assert result_unusual_suffix is not None
177128

178129

179-
def test_file_non_existent(tmp_path: Path) -> None:
180-
"""Test with non-existent file."""
181-
filepath = tmp_path / "non_existent.ts"
182-
with pytest.raises(FileNotFoundError, match="does not exist"):
183-
read_tsurf(filepath)
184-
185-
186130
def test_comments_and_empty_lines(tmp_path: Path) -> None:
187131
"""Test handling of comments and empty lines in files."""
188132
content_lines = [

0 commit comments

Comments
 (0)