@@ -60,6 +60,8 @@ pub struct TableScanBuilder<'a> {
6060 concurrency_limit_manifest_files : usize ,
6161 row_group_filtering_enabled : bool ,
6262 row_selection_enabled : bool ,
63+
64+ limit : Option < usize > ,
6365}
6466
6567impl < ' a > TableScanBuilder < ' a > {
@@ -78,9 +80,16 @@ impl<'a> TableScanBuilder<'a> {
7880 concurrency_limit_manifest_files : num_cpus,
7981 row_group_filtering_enabled : true ,
8082 row_selection_enabled : false ,
83+ limit : None ,
8184 }
8285 }
8386
87+ /// Sets the maximum number of records to return
88+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
89+ self . limit = limit;
90+ self
91+ }
92+
8493 /// Sets the desired size of batches in the response
8594 /// to something other than the default
8695 pub fn with_batch_size ( mut self , batch_size : Option < usize > ) -> Self {
@@ -285,6 +294,7 @@ impl<'a> TableScanBuilder<'a> {
285294 snapshot_schema : schema,
286295 case_sensitive : self . case_sensitive ,
287296 predicate : self . filter . map ( Arc :: new) ,
297+ limit : self . limit ,
288298 snapshot_bound_predicate : snapshot_bound_predicate. map ( Arc :: new) ,
289299 object_cache : self . table . object_cache ( ) ,
290300 field_ids : Arc :: new ( field_ids) ,
@@ -1508,6 +1518,130 @@ pub mod tests {
15081518 assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
15091519 }
15101520
1521+ #[ tokio:: test]
1522+ async fn test_limit ( ) {
1523+ let mut fixture = TableTestFixture :: new ( ) ;
1524+ fixture. setup_manifest_files ( ) . await ;
1525+
1526+ let mut builder = fixture. table . scan ( ) ;
1527+ builder = builder. with_limit ( Some ( 1 ) ) ;
1528+ let table_scan = builder. build ( ) . unwrap ( ) ;
1529+
1530+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1531+
1532+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1533+
1534+ assert_eq ! ( batches. len( ) , 2 ) ;
1535+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1536+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1537+
1538+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1539+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1540+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1541+
1542+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1543+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1544+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1545+
1546+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1547+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1548+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1549+
1550+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1551+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1552+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1553+ }
1554+
1555+ #[ tokio:: test]
1556+ async fn test_limit_with_predicate ( ) {
1557+ let mut fixture = TableTestFixture :: new ( ) ;
1558+ fixture. setup_manifest_files ( ) . await ;
1559+
1560+ // Filter: y > 3
1561+ let mut builder = fixture. table . scan ( ) ;
1562+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1563+ builder = builder. with_filter ( predicate) . with_limit ( Some ( 1 ) ) ;
1564+ let table_scan = builder. build ( ) . unwrap ( ) ;
1565+
1566+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1567+
1568+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1569+
1570+ assert_eq ! ( batches. len( ) , 2 ) ;
1571+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1572+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1573+
1574+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1575+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1576+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1577+
1578+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1579+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1580+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1581+ }
1582+
1583+ #[ tokio:: test]
1584+ async fn test_limit_with_predicate_and_row_selection ( ) {
1585+ let mut fixture = TableTestFixture :: new ( ) ;
1586+ fixture. setup_manifest_files ( ) . await ;
1587+
1588+ // Filter: y > 3
1589+ let mut builder = fixture. table . scan ( ) ;
1590+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1591+ builder = builder
1592+ . with_filter ( predicate)
1593+ . with_limit ( Some ( 1 ) )
1594+ . with_row_selection_enabled ( true ) ;
1595+ let table_scan = builder. build ( ) . unwrap ( ) ;
1596+
1597+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1598+
1599+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1600+
1601+ assert_eq ! ( batches. len( ) , 2 ) ;
1602+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1603+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1604+
1605+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1606+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1607+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1608+
1609+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1610+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1611+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1612+ }
1613+
1614+ #[ tokio:: test]
1615+ async fn test_limit_higher_than_total_rows ( ) {
1616+ let mut fixture = TableTestFixture :: new ( ) ;
1617+ fixture. setup_manifest_files ( ) . await ;
1618+
1619+ // Filter: y > 3
1620+ let mut builder = fixture. table . scan ( ) ;
1621+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1622+ builder = builder
1623+ . with_filter ( predicate)
1624+ . with_limit ( Some ( 100_000_000 ) )
1625+ . with_row_selection_enabled ( true ) ;
1626+ let table_scan = builder. build ( ) . unwrap ( ) ;
1627+
1628+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1629+
1630+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1631+
1632+ assert_eq ! ( batches. len( ) , 2 ) ;
1633+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 312 ) ;
1634+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 312 ) ;
1635+
1636+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1637+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1638+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1639+
1640+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1641+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1642+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1643+ }
1644+
15111645 #[ tokio:: test]
15121646 async fn test_filter_on_arrow_gt_eq ( ) {
15131647 let mut fixture = TableTestFixture :: new ( ) ;
@@ -1882,6 +2016,7 @@ pub mod tests {
18822016 record_count : Some ( 100 ) ,
18832017 data_file_format : DataFileFormat :: Parquet ,
18842018 deletes : vec ! [ ] ,
2019+ limit : None ,
18852020 partition : None ,
18862021 partition_spec : None ,
18872022 name_mapping : None ,
@@ -1900,6 +2035,7 @@ pub mod tests {
19002035 record_count : None ,
19012036 data_file_format : DataFileFormat :: Avro ,
19022037 deletes : vec ! [ ] ,
2038+ limit : None ,
19032039 partition : None ,
19042040 partition_spec : None ,
19052041 name_mapping : None ,
0 commit comments