@@ -91,18 +91,27 @@ def _read_from_path(path: Path):
9191 # IO[bytes]
9292 if isinstance (source , io .BufferedIOBase ):
9393 try :
94+ if not source .seekable ():
95+ # For non-seekable streams, read what we can without seeking
96+ sample = source .read (SAMPLE_SIZE )
97+ if isinstance (sample , bytes ):
98+ return _detect_from_bytes (sample )
99+ else :
100+ return DEFAULT_ENCODING
101+
102+ # For seekable streams, preserve position
94103 original_position = source .tell ()
95- source . seek ( 0 )
96- sample = source .read ( SAMPLE_SIZE )
97- if isinstance ( sample , bytes ):
98- encoding = _detect_from_bytes (sample )
99- else :
100- # if not bytes, then its a custom string-like type that was not caught
101- encoding = DEFAULT_ENCODING
102- source . seek ( original_position )
103- return encoding
104- except ( AttributeError , IOError ):
105- # use default if the stream does not have a `read()` or `seek()` attribute
104+ try :
105+ source .seek ( 0 )
106+ sample = source . read ( SAMPLE_SIZE )
107+ if isinstance (sample , bytes ):
108+ encoding = _detect_from_bytes ( sample )
109+ else :
110+ encoding = DEFAULT_ENCODING
111+ return encoding
112+ finally :
113+ source . seek ( original_position )
114+ except ( AttributeError , IOError , OSError ):
106115 return DEFAULT_ENCODING
107116
108117 raise TypeError (f"Unsupported input type for encoding detection: { type (source )} " )
@@ -147,12 +156,12 @@ def _bytes_source(bytes_content: bytes):
147156 raise FileNotFoundError (f"Path does not exist or is not a file: { source } " )
148157
149158 elif isinstance (source , io .TextIOBase ):
150- source . seek ( 0 )
159+ # Don't seek on passed streams - let caller manage position
151160 return nullcontext (source )
152161
153162 elif isinstance (source , io .BufferedIOBase ):
154163 text_stream = io .TextIOWrapper (source , encoding = encoding )
155- text_stream . seek ( 0 )
164+ # Don't seek on wrapped stream - let caller manage position
156165 return nullcontext (text_stream )
157166
158167 elif isinstance (source , bytes ):
@@ -228,6 +237,7 @@ def brgi_db_to_dfs(
228237
229238
230239def convert_dtypes_object_to_string (dataframe : pd .DataFrame ) -> pd .DataFrame :
240+ dataframe = dataframe .copy ()
231241 object_cols = dataframe .select_dtypes (include = ["object" ]).columns
232242 dataframe [object_cols ] = dataframe [object_cols ].astype ("string" )
233243 return dataframe
0 commit comments