Skip to content

Commit 433aee4

Browse files
committed
Handle non seekable streams & reset stream position after exception
1 parent 2ce4657 commit 433aee4

File tree

1 file changed

+23
-13
lines changed

1 file changed

+23
-13
lines changed

src/bedrock_ge/gi/io_utils.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,27 @@ def _read_from_path(path: Path):
9191
# IO[bytes]
9292
if isinstance(source, io.BufferedIOBase):
9393
try:
94+
if not source.seekable():
95+
# For non-seekable streams, read what we can without seeking
96+
sample = source.read(SAMPLE_SIZE)
97+
if isinstance(sample, bytes):
98+
return _detect_from_bytes(sample)
99+
else:
100+
return DEFAULT_ENCODING
101+
102+
# For seekable streams, preserve position
94103
original_position = source.tell()
95-
source.seek(0)
96-
sample = source.read(SAMPLE_SIZE)
97-
if isinstance(sample, bytes):
98-
encoding = _detect_from_bytes(sample)
99-
else:
100-
# if not bytes, then its a custom string-like type that was not caught
101-
encoding = DEFAULT_ENCODING
102-
source.seek(original_position)
103-
return encoding
104-
except (AttributeError, IOError):
105-
# use default if the stream does not have a `read()` or `seek()` attribute
104+
try:
105+
source.seek(0)
106+
sample = source.read(SAMPLE_SIZE)
107+
if isinstance(sample, bytes):
108+
encoding = _detect_from_bytes(sample)
109+
else:
110+
encoding = DEFAULT_ENCODING
111+
return encoding
112+
finally:
113+
source.seek(original_position)
114+
except (AttributeError, IOError, OSError):
106115
return DEFAULT_ENCODING
107116

108117
raise TypeError(f"Unsupported input type for encoding detection: {type(source)}")
@@ -147,12 +156,12 @@ def _bytes_source(bytes_content: bytes):
147156
raise FileNotFoundError(f"Path does not exist or is not a file: {source}")
148157

149158
elif isinstance(source, io.TextIOBase):
150-
source.seek(0)
159+
# Don't seek on passed streams - let caller manage position
151160
return nullcontext(source)
152161

153162
elif isinstance(source, io.BufferedIOBase):
154163
text_stream = io.TextIOWrapper(source, encoding=encoding)
155-
text_stream.seek(0)
164+
# Don't seek on wrapped stream - let caller manage position
156165
return nullcontext(text_stream)
157166

158167
elif isinstance(source, bytes):
@@ -228,6 +237,7 @@ def brgi_db_to_dfs(
228237

229238

230239
def convert_dtypes_object_to_string(dataframe: pd.DataFrame) -> pd.DataFrame:
240+
dataframe = dataframe.copy()
231241
object_cols = dataframe.select_dtypes(include=["object"]).columns
232242
dataframe[object_cols] = dataframe[object_cols].astype("string")
233243
return dataframe

0 commit comments

Comments
 (0)