@@ -324,111 +324,3 @@ def test_empty_result_unload(self, async_polars_cursor):
324324 df = future .result ().as_polars ()
325325 assert df .height == 0
326326 assert df .width == 0
327-
328- def test_iter_chunks (self ):
329- """Test chunked iteration over query results."""
330- with contextlib .closing (connect (schema_name = ENV .schema )) as conn :
331- cursor = conn .cursor (AsyncPolarsCursor , chunksize = 5 )
332- query_id , future = cursor .execute ("SELECT * FROM many_rows LIMIT 15" )
333- assert query_id is not None
334- result_set = future .result ()
335- chunks = list (result_set .iter_chunks ())
336- assert len (chunks ) > 0
337- total_rows = sum (chunk .height for chunk in chunks )
338- assert total_rows == 15
339- for chunk in chunks :
340- assert isinstance (chunk , pl .DataFrame )
341-
342- def test_iter_chunks_without_chunksize (self , async_polars_cursor ):
343- """Test that iter_chunks works without chunksize, yielding entire DataFrame."""
344- query_id , future = async_polars_cursor .execute ("SELECT * FROM one_row" )
345- assert query_id is not None
346- result_set = future .result ()
347- chunks = list (result_set .iter_chunks ())
348- # Without chunksize, yields entire DataFrame as single chunk
349- assert len (chunks ) == 1
350- assert isinstance (chunks [0 ], pl .DataFrame )
351- assert chunks [0 ].height == 1
352-
353- def test_iter_chunks_many_rows (self ):
354- """Test chunked iteration with many rows."""
355- with contextlib .closing (connect (schema_name = ENV .schema )) as conn :
356- cursor = conn .cursor (AsyncPolarsCursor , chunksize = 1000 )
357- query_id , future = cursor .execute ("SELECT * FROM many_rows" )
358- assert query_id is not None
359- result_set = future .result ()
360- chunks = list (result_set .iter_chunks ())
361- total_rows = sum (chunk .height for chunk in chunks )
362- assert total_rows == 10000
363- assert len (chunks ) >= 10 # At least 10 chunks with chunksize=1000
364-
365- @pytest .mark .parametrize (
366- "async_polars_cursor" ,
367- [
368- {
369- "cursor_kwargs" : {"unload" : True , "chunksize" : 5 },
370- },
371- ],
372- indirect = ["async_polars_cursor" ],
373- )
374- def test_iter_chunks_unload (self , async_polars_cursor ):
375- """Test chunked iteration with UNLOAD (Parquet)."""
376- query_id , future = async_polars_cursor .execute ("SELECT * FROM many_rows LIMIT 15" )
377- assert query_id is not None
378- result_set = future .result ()
379- chunks = list (result_set .iter_chunks ())
380- assert len (chunks ) > 0
381- total_rows = sum (chunk .height for chunk in chunks )
382- assert total_rows == 15
383- for chunk in chunks :
384- assert isinstance (chunk , pl .DataFrame )
385-
386- def test_iter_chunks_data_consistency (self ):
387- """Test that chunked and regular reading produce the same data."""
388- with contextlib .closing (connect (schema_name = ENV .schema )) as conn :
389- # Regular reading (no chunksize)
390- regular_cursor = conn .cursor (AsyncPolarsCursor )
391- query_id , future = regular_cursor .execute ("SELECT * FROM many_rows LIMIT 100" )
392- assert query_id is not None
393- regular_df = future .result ().as_polars ()
394-
395- # Chunked reading
396- chunked_cursor = conn .cursor (AsyncPolarsCursor , chunksize = 25 )
397- query_id , future = chunked_cursor .execute ("SELECT * FROM many_rows LIMIT 100" )
398- assert query_id is not None
399- result_set = future .result ()
400- chunked_dfs = list (result_set .iter_chunks ())
401-
402- # Combine chunks
403- combined_df = pl .concat (chunked_dfs )
404-
405- # Should have the same data (sort for comparison)
406- assert regular_df .sort ("a" ).equals (combined_df .sort ("a" ))
407-
408- # Should have multiple chunks
409- assert len (chunked_dfs ) > 1
410-
411- def test_iter_chunks_chunk_sizes (self ):
412- """Test that chunks have correct sizes."""
413- with contextlib .closing (connect (schema_name = ENV .schema )) as conn :
414- cursor = conn .cursor (AsyncPolarsCursor , chunksize = 10 )
415- query_id , future = cursor .execute ("SELECT * FROM many_rows LIMIT 50" )
416- assert query_id is not None
417- result_set = future .result ()
418-
419- chunk_sizes = []
420- total_rows = 0
421-
422- for chunk in result_set .iter_chunks ():
423- chunk_size = chunk .height
424- chunk_sizes .append (chunk_size )
425- total_rows += chunk_size
426-
427- # Each chunk should not exceed chunksize
428- assert chunk_size <= 10
429-
430- # Should have processed all 50 rows
431- assert total_rows == 50
432-
433- # Should have multiple chunks
434- assert len (chunk_sizes ) > 1
0 commit comments