Skip to content

Commit b0f1b1d

Browse files
Rename DataFrameIterator to PandasDataFrameIterator and PolarsDataFrameIterator
Rename the DataFrameIterator classes to include their respective module prefix for clarity and to avoid confusion when importing from both modules. - pyathena.pandas.result_set.DataFrameIterator → PandasDataFrameIterator - pyathena.polars.result_set.DataFrameIterator → PolarsDataFrameIterator Also updates all documentation and test references. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 69d0625 commit b0f1b1d

File tree

7 files changed

+33
-30
lines changed

7 files changed

+33
-30
lines changed

docs/api/pandas.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Pandas Result Set
2323
:members:
2424
:inherited-members:
2525

26-
.. autoclass:: pyathena.pandas.result_set.DataFrameIterator
26+
.. autoclass:: pyathena.pandas.result_set.PandasDataFrameIterator
2727
:members:
2828

2929
Pandas Data Converters

docs/api/polars.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Polars Result Set
2323
:members:
2424
:inherited-members:
2525

26+
.. autoclass:: pyathena.polars.result_set.PolarsDataFrameIterator
27+
:members:
28+
2629
Polars Data Converters
2730
----------------------
2831

docs/pandas.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ SQLAlchemy allows this option to be specified in the connection string.
381381
382382
awsathena+pandas://:@athena.{region_name}.amazonaws.com:443/{schema_name}?s3_staging_dir={s3_staging_dir}&chunksize=1000000...
383383
384-
When this option is used, the object returned by the as_pandas method is a ``DataFrameIterator`` object.
384+
When this option is used, the object returned by the as_pandas method is a ``PandasDataFrameIterator`` object.
385385
This object has exactly the same interface as the ``TextFileReader`` object and can be handled in the same way.
386386

387387
.. code:: python
@@ -418,7 +418,7 @@ PandasCursor provides an ``iter_chunks()`` method for convenient chunked process
418418
# Memory can be freed after each chunk
419419
del chunk
420420
421-
The ``DataFrameIterator`` also has an ``as_pandas()`` method that collects all chunks into a single DataFrame:
421+
The ``PandasDataFrameIterator`` also has an ``as_pandas()`` method that collects all chunks into a single DataFrame:
422422

423423
.. code:: python
424424

pyathena/pandas/cursor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
DefaultPandasTypeConverter,
2626
DefaultPandasUnloadTypeConverter,
2727
)
28-
from pyathena.pandas.result_set import AthenaPandasResultSet, DataFrameIterator
28+
from pyathena.pandas.result_set import AthenaPandasResultSet, PandasDataFrameIterator
2929
from pyathena.result_set import WithResultSet
3030

3131
if TYPE_CHECKING:
@@ -331,11 +331,11 @@ def fetchall(
331331
result_set = cast(AthenaPandasResultSet, self.result_set)
332332
return result_set.fetchall()
333333

334-
def as_pandas(self) -> Union["DataFrame", DataFrameIterator]:
335-
"""Return DataFrame or DataFrameIterator based on chunksize setting.
334+
def as_pandas(self) -> Union["DataFrame", PandasDataFrameIterator]:
335+
"""Return DataFrame or PandasDataFrameIterator based on chunksize setting.
336336
337337
Returns:
338-
DataFrame when chunksize is None, DataFrameIterator when chunksize is set.
338+
DataFrame when chunksize is None, PandasDataFrameIterator when chunksize is set.
339339
"""
340340
if not self.has_result_set:
341341
raise ProgrammingError("No result set.")

pyathena/pandas/result_set.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def _no_trunc_date(df: "DataFrame") -> "DataFrame":
3838
return df
3939

4040

41-
class DataFrameIterator(abc.Iterator): # type: ignore
41+
class PandasDataFrameIterator(abc.Iterator): # type: ignore
4242
"""Iterator for chunked DataFrame results from Athena queries.
4343
4444
This class wraps either a pandas TextFileReader (for chunked reading) or
@@ -98,11 +98,11 @@ def __next__(self) -> "DataFrame":
9898
self.close()
9999
raise
100100

101-
def __iter__(self) -> "DataFrameIterator":
101+
def __iter__(self) -> "PandasDataFrameIterator":
102102
"""Return self as iterator."""
103103
return self
104104

105-
def __enter__(self) -> "DataFrameIterator":
105+
def __enter__(self) -> "PandasDataFrameIterator":
106106
"""Context manager entry."""
107107
return self
108108

@@ -285,11 +285,11 @@ def __init__(
285285
if self.state == AthenaQueryExecution.STATE_SUCCEEDED and self.output_location:
286286
df = self._as_pandas()
287287
trunc_date = _no_trunc_date if self.is_unload else self._trunc_date
288-
self._df_iter = DataFrameIterator(df, trunc_date)
288+
self._df_iter = PandasDataFrameIterator(df, trunc_date)
289289
else:
290290
import pandas as pd
291291

292-
self._df_iter = DataFrameIterator(pd.DataFrame(), _no_trunc_date)
292+
self._df_iter = PandasDataFrameIterator(pd.DataFrame(), _no_trunc_date)
293293
self._iterrows = self._df_iter.iterrows()
294294

295295
def _get_parquet_engine(self) -> str:
@@ -670,12 +670,12 @@ def _as_pandas(self) -> Union["TextFileReader", "DataFrame"]:
670670
df = self._read_csv()
671671
return df
672672

673-
def as_pandas(self) -> Union[DataFrameIterator, "DataFrame"]:
673+
def as_pandas(self) -> Union[PandasDataFrameIterator, "DataFrame"]:
674674
if self._chunksize is None:
675675
return next(self._df_iter)
676676
return self._df_iter
677677

678-
def iter_chunks(self) -> DataFrameIterator:
678+
def iter_chunks(self) -> PandasDataFrameIterator:
679679
"""Iterate over result chunks as pandas DataFrames.
680680
681681
This method provides an iterator interface for processing large result sets.
@@ -684,7 +684,7 @@ def iter_chunks(self) -> DataFrameIterator:
684684
single DataFrame.
685685
686686
Returns:
687-
DataFrameIterator that yields pandas DataFrames for each chunk
687+
PandasDataFrameIterator that yields pandas DataFrames for each chunk
688688
of rows, or the entire DataFrame if chunksize was not specified.
689689
690690
Example:
@@ -706,6 +706,6 @@ def close(self) -> None:
706706
import pandas as pd
707707

708708
super().close()
709-
self._df_iter = DataFrameIterator(pd.DataFrame(), _no_trunc_date)
709+
self._df_iter = PandasDataFrameIterator(pd.DataFrame(), _no_trunc_date)
710710
self._iterrows = enumerate([])
711711
self._data_manifest = []

pyathena/polars/result_set.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
_logger = logging.getLogger(__name__)
3535

3636

37-
class DataFrameIterator(abc.Iterator): # type: ignore
37+
class PolarsDataFrameIterator(abc.Iterator): # type: ignore
3838
"""Iterator for chunked DataFrame results from Athena queries.
3939
4040
This class wraps either a Polars DataFrame iterator (for chunked reading) or
@@ -99,11 +99,11 @@ def __next__(self) -> "pl.DataFrame":
9999
self.close()
100100
raise
101101

102-
def __iter__(self) -> "DataFrameIterator":
102+
def __iter__(self) -> "PolarsDataFrameIterator":
103103
"""Return self as iterator."""
104104
return self
105105

106-
def __enter__(self) -> "DataFrameIterator":
106+
def __enter__(self) -> "PolarsDataFrameIterator":
107107
"""Context manager entry."""
108108
return self
109109

@@ -247,7 +247,7 @@ def __init__(
247247
else:
248248
import polars as pl
249249

250-
self._df_iter = DataFrameIterator(
250+
self._df_iter = PolarsDataFrameIterator(
251251
pl.DataFrame(), self.converters, self._get_column_names()
252252
)
253253
self._iterrows = self._df_iter.iterrows()
@@ -320,11 +320,11 @@ def _get_column_names(self) -> List[str]:
320320
description = self.description if self.description else []
321321
return [d[0] for d in description]
322322

323-
def _create_dataframe_iterator(self) -> DataFrameIterator:
323+
def _create_dataframe_iterator(self) -> PolarsDataFrameIterator:
324324
"""Create a DataFrame iterator for the result set.
325325
326326
Returns:
327-
DataFrameIterator that handles both chunked and non-chunked cases.
327+
PolarsDataFrameIterator that handles both chunked and non-chunked cases.
328328
"""
329329
if self._chunksize is not None:
330330
# Chunked mode: create lazy iterator
@@ -335,7 +335,7 @@ def _create_dataframe_iterator(self) -> DataFrameIterator:
335335
# Non-chunked mode: load entire DataFrame
336336
reader = self._as_polars()
337337

338-
return DataFrameIterator(reader, self.converters, self._get_column_names())
338+
return PolarsDataFrameIterator(reader, self.converters, self._get_column_names())
339339

340340
def fetchone(
341341
self,
@@ -661,7 +661,7 @@ def _iter_parquet_chunks(self) -> Iterator["pl.DataFrame"]:
661661
_logger.exception(f"Failed to read {self._unload_location}.")
662662
raise OperationalError(*e.args) from e
663663

664-
def iter_chunks(self) -> DataFrameIterator:
664+
def iter_chunks(self) -> PolarsDataFrameIterator:
665665
"""Iterate over result chunks as Polars DataFrames.
666666
667667
This method provides an iterator interface for processing large result sets.
@@ -670,7 +670,7 @@ def iter_chunks(self) -> DataFrameIterator:
670670
it yields the entire result as a single DataFrame.
671671
672672
Returns:
673-
DataFrameIterator that yields Polars DataFrames for each chunk
673+
PolarsDataFrameIterator that yields Polars DataFrames for each chunk
674674
of rows, or the entire DataFrame if chunksize was not specified.
675675
676676
Example:
@@ -693,5 +693,5 @@ def close(self) -> None:
693693
import polars as pl
694694

695695
super().close()
696-
self._df_iter = DataFrameIterator(pl.DataFrame(), {}, [])
696+
self._df_iter = PolarsDataFrameIterator(pl.DataFrame(), {}, [])
697697
self._iterrows = iter([])

tests/pyathena/pandas/test_cursor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from pyathena.error import DatabaseError, ProgrammingError
1717
from pyathena.pandas.cursor import PandasCursor
18-
from pyathena.pandas.result_set import AthenaPandasResultSet, DataFrameIterator
18+
from pyathena.pandas.result_set import AthenaPandasResultSet, PandasDataFrameIterator
1919
from tests import ENV
2020
from tests.pyathena.conftest import connect
2121

@@ -1207,7 +1207,7 @@ def test_pandas_cursor_auto_optimize_chunksize_enabled(self, pandas_cursor):
12071207
# Should work without error (auto-optimization for small files may not trigger chunking)
12081208
result = cursor.as_pandas()
12091209
# Small test data likely won't trigger chunking, so expect DataFrame
1210-
assert isinstance(result, (pd.DataFrame, DataFrameIterator))
1210+
assert isinstance(result, (pd.DataFrame, PandasDataFrameIterator))
12111211

12121212
def test_pandas_cursor_auto_optimize_chunksize_disabled(self, pandas_cursor):
12131213
"""Test PandasCursor with auto_optimize_chunksize disabled (default)."""
@@ -1229,7 +1229,7 @@ def test_pandas_cursor_explicit_chunksize_overrides_auto_optimize(self, pandas_c
12291229

12301230
# Should return iterator due to explicit chunksize
12311231
result = cursor.as_pandas()
1232-
assert isinstance(result, DataFrameIterator)
1232+
assert isinstance(result, PandasDataFrameIterator)
12331233

12341234
def test_pandas_cursor_iter_chunks_without_chunksize(self, pandas_cursor):
12351235
"""Test PandasCursor iter_chunks method without chunksize (single DataFrame)."""
@@ -1283,7 +1283,7 @@ def test_pandas_cursor_actual_chunking_behavior(self, pandas_cursor):
12831283
cursor.execute("SELECT * FROM many_rows LIMIT 50")
12841284

12851285
result = cursor.as_pandas()
1286-
assert isinstance(result, DataFrameIterator)
1286+
assert isinstance(result, PandasDataFrameIterator)
12871287

12881288
chunk_sizes = []
12891289
total_rows = 0

0 commit comments

Comments
 (0)