Skip to content

Commit cef042a

Browse files
Add chunksize tests for PolarsCursor fetch methods
- Add tests for fetchone, fetchmany, fetchall, and iterator with chunksize - Add tests for fetch methods with UNLOAD mode and chunksize - Remove redundant iter_chunks tests from AsyncPolarsCursor since both cursor types share the same AthenaPolarsResultSet implementation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 317fd3b commit cef042a

File tree

2 files changed

+82
-108
lines changed

2 files changed

+82
-108
lines changed

tests/pyathena/polars/test_async_cursor.py

Lines changed: 0 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -324,111 +324,3 @@ def test_empty_result_unload(self, async_polars_cursor):
324324
df = future.result().as_polars()
325325
assert df.height == 0
326326
assert df.width == 0
327-
328-
def test_iter_chunks(self):
329-
"""Test chunked iteration over query results."""
330-
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
331-
cursor = conn.cursor(AsyncPolarsCursor, chunksize=5)
332-
query_id, future = cursor.execute("SELECT * FROM many_rows LIMIT 15")
333-
assert query_id is not None
334-
result_set = future.result()
335-
chunks = list(result_set.iter_chunks())
336-
assert len(chunks) > 0
337-
total_rows = sum(chunk.height for chunk in chunks)
338-
assert total_rows == 15
339-
for chunk in chunks:
340-
assert isinstance(chunk, pl.DataFrame)
341-
342-
def test_iter_chunks_without_chunksize(self, async_polars_cursor):
343-
"""Test that iter_chunks works without chunksize, yielding entire DataFrame."""
344-
query_id, future = async_polars_cursor.execute("SELECT * FROM one_row")
345-
assert query_id is not None
346-
result_set = future.result()
347-
chunks = list(result_set.iter_chunks())
348-
# Without chunksize, yields entire DataFrame as single chunk
349-
assert len(chunks) == 1
350-
assert isinstance(chunks[0], pl.DataFrame)
351-
assert chunks[0].height == 1
352-
353-
def test_iter_chunks_many_rows(self):
354-
"""Test chunked iteration with many rows."""
355-
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
356-
cursor = conn.cursor(AsyncPolarsCursor, chunksize=1000)
357-
query_id, future = cursor.execute("SELECT * FROM many_rows")
358-
assert query_id is not None
359-
result_set = future.result()
360-
chunks = list(result_set.iter_chunks())
361-
total_rows = sum(chunk.height for chunk in chunks)
362-
assert total_rows == 10000
363-
assert len(chunks) >= 10 # At least 10 chunks with chunksize=1000
364-
365-
@pytest.mark.parametrize(
366-
"async_polars_cursor",
367-
[
368-
{
369-
"cursor_kwargs": {"unload": True, "chunksize": 5},
370-
},
371-
],
372-
indirect=["async_polars_cursor"],
373-
)
374-
def test_iter_chunks_unload(self, async_polars_cursor):
375-
"""Test chunked iteration with UNLOAD (Parquet)."""
376-
query_id, future = async_polars_cursor.execute("SELECT * FROM many_rows LIMIT 15")
377-
assert query_id is not None
378-
result_set = future.result()
379-
chunks = list(result_set.iter_chunks())
380-
assert len(chunks) > 0
381-
total_rows = sum(chunk.height for chunk in chunks)
382-
assert total_rows == 15
383-
for chunk in chunks:
384-
assert isinstance(chunk, pl.DataFrame)
385-
386-
def test_iter_chunks_data_consistency(self):
387-
"""Test that chunked and regular reading produce the same data."""
388-
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
389-
# Regular reading (no chunksize)
390-
regular_cursor = conn.cursor(AsyncPolarsCursor)
391-
query_id, future = regular_cursor.execute("SELECT * FROM many_rows LIMIT 100")
392-
assert query_id is not None
393-
regular_df = future.result().as_polars()
394-
395-
# Chunked reading
396-
chunked_cursor = conn.cursor(AsyncPolarsCursor, chunksize=25)
397-
query_id, future = chunked_cursor.execute("SELECT * FROM many_rows LIMIT 100")
398-
assert query_id is not None
399-
result_set = future.result()
400-
chunked_dfs = list(result_set.iter_chunks())
401-
402-
# Combine chunks
403-
combined_df = pl.concat(chunked_dfs)
404-
405-
# Should have the same data (sort for comparison)
406-
assert regular_df.sort("a").equals(combined_df.sort("a"))
407-
408-
# Should have multiple chunks
409-
assert len(chunked_dfs) > 1
410-
411-
def test_iter_chunks_chunk_sizes(self):
412-
"""Test that chunks have correct sizes."""
413-
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
414-
cursor = conn.cursor(AsyncPolarsCursor, chunksize=10)
415-
query_id, future = cursor.execute("SELECT * FROM many_rows LIMIT 50")
416-
assert query_id is not None
417-
result_set = future.result()
418-
419-
chunk_sizes = []
420-
total_rows = 0
421-
422-
for chunk in result_set.iter_chunks():
423-
chunk_size = chunk.height
424-
chunk_sizes.append(chunk_size)
425-
total_rows += chunk_size
426-
427-
# Each chunk should not exceed chunksize
428-
assert chunk_size <= 10
429-
430-
# Should have processed all 50 rows
431-
assert total_rows == 50
432-
433-
# Should have multiple chunks
434-
assert len(chunk_sizes) > 1

tests/pyathena/polars/test_cursor.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,85 @@ def test_iter_chunks_chunk_sizes(self):
542542

543543
# Should have multiple chunks
544544
assert len(chunk_sizes) > 1
545+
546+
def test_fetchone_with_chunksize(self):
547+
"""Test that fetchone works correctly with chunksize enabled."""
548+
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
549+
cursor = conn.cursor(PolarsCursor, chunksize=5)
550+
cursor.execute("SELECT * FROM many_rows LIMIT 15")
551+
552+
rows = []
553+
while True:
554+
row = cursor.fetchone()
555+
if row is None:
556+
break
557+
rows.append(row)
558+
559+
assert len(rows) == 15
560+
561+
def test_fetchmany_with_chunksize(self):
562+
"""Test that fetchmany works correctly with chunksize enabled."""
563+
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
564+
cursor = conn.cursor(PolarsCursor, chunksize=5)
565+
cursor.execute("SELECT * FROM many_rows LIMIT 15")
566+
567+
batch1 = cursor.fetchmany(10)
568+
batch2 = cursor.fetchmany(10)
569+
570+
assert len(batch1) == 10
571+
assert len(batch2) == 5
572+
573+
def test_fetchall_with_chunksize(self):
574+
"""Test that fetchall works correctly with chunksize enabled."""
575+
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
576+
cursor = conn.cursor(PolarsCursor, chunksize=5)
577+
cursor.execute("SELECT * FROM many_rows LIMIT 15")
578+
579+
rows = cursor.fetchall()
580+
assert len(rows) == 15
581+
582+
def test_iterator_with_chunksize(self):
583+
"""Test that cursor iteration works correctly with chunksize enabled."""
584+
with contextlib.closing(connect(schema_name=ENV.schema)) as conn:
585+
cursor = conn.cursor(PolarsCursor, chunksize=5)
586+
cursor.execute("SELECT * FROM many_rows LIMIT 15")
587+
588+
rows = list(cursor)
589+
assert len(rows) == 15
590+
591+
@pytest.mark.parametrize(
592+
"polars_cursor",
593+
[
594+
{
595+
"cursor_kwargs": {"unload": True, "chunksize": 5},
596+
},
597+
],
598+
indirect=["polars_cursor"],
599+
)
600+
def test_fetchone_with_chunksize_unload(self, polars_cursor):
601+
"""Test that fetchone works correctly with chunksize and unload enabled."""
602+
polars_cursor.execute("SELECT * FROM many_rows LIMIT 15")
603+
604+
rows = []
605+
while True:
606+
row = polars_cursor.fetchone()
607+
if row is None:
608+
break
609+
rows.append(row)
610+
611+
assert len(rows) == 15
612+
613+
@pytest.mark.parametrize(
614+
"polars_cursor",
615+
[
616+
{
617+
"cursor_kwargs": {"unload": True, "chunksize": 5},
618+
},
619+
],
620+
indirect=["polars_cursor"],
621+
)
622+
def test_iterator_with_chunksize_unload(self, polars_cursor):
623+
"""Test that cursor iteration works with chunksize and unload enabled."""
624+
polars_cursor.execute("SELECT * FROM many_rows LIMIT 15")
625+
rows = list(polars_cursor)
626+
assert len(rows) == 15

0 commit comments

Comments
 (0)