@@ -6447,6 +6447,11 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
64476447 Return named tuples instead of regular tuples. This is more expensive than
64486448 returning regular tuples, but allows for accessing values by column name.
64496449
6450+ Warnings
6451+ --------
6452+ Row-iteration is not optimal as the underlying data is stored in columnar form;
6453+ where possible, prefer export via one of the dedicated export/output methods.
6454+
64506455 Examples
64516456 --------
64526457 >>> df = pl.DataFrame(
@@ -6460,6 +6465,10 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
64606465 >>> df.rows(named=True)
64616466 [Row(a=1, b=2), Row(a=3, b=4), Row(a=5, b=6)]
64626467
6468+ See Also
6469+ --------
6470+ iterrows : row iterator over frame data (does not materialise all rows).
6471+
64636472 """
64646473 if named :
64656474 Row = namedtuple ("Row" , self .columns ) # type: ignore[misc]
@@ -6468,15 +6477,19 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
64686477 return self ._df .row_tuples ()
64696478
64706479 @overload
6471- def iterrows (self , named : Literal [False ] = ...) -> Iterator [tuple [Any , ...]]:
6480+ def iterrows (
6481+ self , named : Literal [False ] = ..., buffer_size : int = ...
6482+ ) -> Iterator [tuple [Any , ...]]:
64726483 ...
64736484
64746485 @overload
6475- def iterrows (self , named : Literal [True ] = ...) -> Iterator [Any ]:
6486+ def iterrows (
6487+ self , named : Literal [True ] = ..., buffer_size : int = ...
6488+ ) -> Iterator [Any ]:
64766489 ...
64776490
64786491 def iterrows (
6479- self , named : bool = False
6492+ self , named : bool = False , buffer_size : int = 500
64806493 ) -> Iterator [tuple [Any , ...]] | Iterator [Any ]:
64816494 """
64826495 Returns an iterator over the rows in the DataFrame.
@@ -6487,9 +6500,22 @@ def iterrows(
64876500 Return named tuples instead of regular tuples. This is more expensive than
64886501 returning regular tuples, but allows for accessing values by column name.
64896502
6503+ buffer_size
6504+ Determines the number of rows that are buffered internally while iterating
6505+ over the data; you should only modify this in very specific cases where the
6506+ default value is determined not to be a good fit to your access pattern, as
6507+ the speedup from using the buffer is significant (~2-4x). Setting this
6508+ value to zero disables row buffering.
6509+
64906510 Warnings
64916511 --------
6492- This is very expensive and should not be used in any performance critical code!
6512+ Row-iteration is not optimal as the underlying data is stored in columnar form;
6513+ where possible, prefer export via one of the dedicated export/output methods.
6514+
6515+ Notes
6516+ -----
6517+ If you are planning to materialise all frame data at once you should prefer
6518+ calling ``rows()``, which will be faster.
64936519
64946520 Examples
64956521 --------
@@ -6504,9 +6530,25 @@ def iterrows(
65046530 >>> [row.b for row in df.iterrows(named=True)]
65056531 [2, 4, 6]
65066532
6533+ See Also
6534+ --------
6535+ rows : materialises all frame data as a list of rows.
6536+
65076537 """
6538+ # note: buffering rows results in a 2-4x speedup over individual calls
6539+ # to ".row(i)", so it should only be disabled in extremely specific cases.
65086540 if named :
65096541 Row = namedtuple ("Row" , self .columns ) # type: ignore[misc]
6542+ if buffer_size :
6543+ for offset in range (0 , self .height , buffer_size ):
6544+ rows_chunk = self .slice (offset , buffer_size ).rows (named = False )
6545+ if named :
6546+ for row in rows_chunk :
6547+ yield Row (* row )
6548+ else :
6549+ yield from rows_chunk
6550+
6551+ elif named :
65106552 for i in range (self .height ):
65116553 yield Row (* self .row (i ))
65126554 else :
0 commit comments