@@ -64,6 +64,8 @@ pub struct TableScanBuilder<'a> {
6464 // is still being worked on but will switch to a default of true
6565 // once this work is complete
6666 delete_file_processing_enabled : bool ,
67+
68+ limit : Option < usize > ,
6769}
6870
6971impl < ' a > TableScanBuilder < ' a > {
@@ -83,9 +85,16 @@ impl<'a> TableScanBuilder<'a> {
8385 row_group_filtering_enabled : true ,
8486 row_selection_enabled : false ,
8587 delete_file_processing_enabled : false ,
88+ limit : None ,
8689 }
8790 }
8891
92+ /// Sets the maximum number of records to return
93+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
94+ self . limit = limit;
95+ self
96+ }
97+
8998 /// Sets the desired size of batches in the response
9099 /// to something other than the default
91100 pub fn with_batch_size ( mut self , batch_size : Option < usize > ) -> Self {
@@ -299,6 +308,7 @@ impl<'a> TableScanBuilder<'a> {
299308 snapshot_schema : schema,
300309 case_sensitive : self . case_sensitive ,
301310 predicate : self . filter . map ( Arc :: new) ,
311+ limit : self . limit ,
302312 snapshot_bound_predicate : snapshot_bound_predicate. map ( Arc :: new) ,
303313 object_cache : self . table . object_cache ( ) ,
304314 field_ids : Arc :: new ( field_ids) ,
@@ -1441,6 +1451,130 @@ pub mod tests {
14411451 assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
14421452 }
14431453
1454+ #[ tokio:: test]
1455+ async fn test_limit ( ) {
1456+ let mut fixture = TableTestFixture :: new ( ) ;
1457+ fixture. setup_manifest_files ( ) . await ;
1458+
1459+ let mut builder = fixture. table . scan ( ) ;
1460+ builder = builder. with_limit ( Some ( 1 ) ) ;
1461+ let table_scan = builder. build ( ) . unwrap ( ) ;
1462+
1463+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1464+
1465+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1466+
1467+ assert_eq ! ( batches. len( ) , 2 ) ;
1468+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1469+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1470+
1471+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1472+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1473+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1474+
1475+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1476+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1477+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1478+
1479+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1480+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1481+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1482+
1483+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1484+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1485+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1486+ }
1487+
1488+ #[ tokio:: test]
1489+ async fn test_limit_with_predicate ( ) {
1490+ let mut fixture = TableTestFixture :: new ( ) ;
1491+ fixture. setup_manifest_files ( ) . await ;
1492+
1493+ // Filter: y < 3
1494+ let mut builder = fixture. table . scan ( ) ;
1495+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1496+ builder = builder. with_filter ( predicate) . with_limit ( Some ( 1 ) ) ;
1497+ let table_scan = builder. build ( ) . unwrap ( ) ;
1498+
1499+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1500+
1501+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1502+
1503+ assert_eq ! ( batches. len( ) , 2 ) ;
1504+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1505+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1506+
1507+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1508+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1509+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1510+
1511+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1512+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1513+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1514+ }
1515+
1516+ #[ tokio:: test]
1517+ async fn test_limit_with_predicate_and_row_selection ( ) {
1518+ let mut fixture = TableTestFixture :: new ( ) ;
1519+ fixture. setup_manifest_files ( ) . await ;
1520+
1521+ // Filter: y < 3
1522+ let mut builder = fixture. table . scan ( ) ;
1523+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1524+ builder = builder
1525+ . with_filter ( predicate)
1526+ . with_limit ( Some ( 1 ) )
1527+ . with_row_selection_enabled ( true ) ;
1528+ let table_scan = builder. build ( ) . unwrap ( ) ;
1529+
1530+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1531+
1532+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1533+
1534+ assert_eq ! ( batches. len( ) , 2 ) ;
1535+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1536+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1537+
1538+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1539+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1540+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1541+
1542+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1543+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1544+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1545+ }
1546+
1547+ #[ tokio:: test]
1548+ async fn test_limit_higher_than_total_rows ( ) {
1549+ let mut fixture = TableTestFixture :: new ( ) ;
1550+ fixture. setup_manifest_files ( ) . await ;
1551+
1552+ // Filter: y < 3
1553+ let mut builder = fixture. table . scan ( ) ;
1554+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1555+ builder = builder
1556+ . with_filter ( predicate)
1557+ . with_limit ( Some ( 100_000_000 ) )
1558+ . with_row_selection_enabled ( true ) ;
1559+ let table_scan = builder. build ( ) . unwrap ( ) ;
1560+
1561+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1562+
1563+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1564+
1565+ assert_eq ! ( batches. len( ) , 2 ) ;
1566+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 312 ) ;
1567+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 312 ) ;
1568+
1569+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1570+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1571+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1572+
1573+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1574+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1575+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1576+ }
1577+
14441578 #[ tokio:: test]
14451579 async fn test_filter_on_arrow_gt_eq ( ) {
14461580 let mut fixture = TableTestFixture :: new ( ) ;
@@ -1816,6 +1950,7 @@ pub mod tests {
18161950 record_count : Some ( 100 ) ,
18171951 data_file_format : DataFileFormat :: Parquet ,
18181952 deletes : vec ! [ ] ,
1953+ limit : None ,
18191954 } ;
18201955 test_fn ( task) ;
18211956
@@ -1831,6 +1966,7 @@ pub mod tests {
18311966 record_count : None ,
18321967 data_file_format : DataFileFormat :: Avro ,
18331968 deletes : vec ! [ ] ,
1969+ limit : None ,
18341970 } ;
18351971 test_fn ( task) ;
18361972 }
0 commit comments