@@ -10,14 +10,16 @@ use anyhow::{Context, Result};
1010use clap:: Parser ;
1111use runner:: assert_results_match;
1212use std:: path:: PathBuf ;
13+ use std:: time:: Instant ;
1314
1415#[ derive( Parser ) ]
1516#[ command( name = "ducklake-benchmark" ) ]
1617#[ command( about = "Benchmark comparing DuckDB-DuckLake vs DataFusion-DuckLake performance" ) ]
18+ #[ command( version) ]
1719struct Args {
1820 /// Path to the DuckLake catalog database
1921 #[ arg( short, long) ]
20- catalog : PathBuf ,
22+ catalog : Option < PathBuf > ,
2123
2224 /// Use TPC-H queries from DuckDB's tpch extension (22 queries)
2325 #[ arg( long) ]
@@ -46,24 +48,57 @@ struct Args {
4648 /// Skip queries that fail and continue (useful for TPC-DS where some queries may not be supported)
4749 #[ arg( long) ]
4850 skip_errors : bool ,
51+
52+ /// Skip queries that take longer than this many milliseconds during warmup (e.g., 500)
53+ #[ arg( long) ]
54+ timeout_ms : Option < u64 > ,
55+
56+ /// Show version information for all components
57+ #[ arg( long) ]
58+ show_versions : bool ,
59+ }
60+
61+ fn print_versions ( ) {
62+ // Get DuckDB version by querying it
63+ let duckdb_version = duckdb:: Connection :: open_in_memory ( )
64+ . and_then ( |conn| conn. query_row ( "SELECT version()" , [ ] , |row| row. get :: < _ , String > ( 0 ) ) )
65+ . unwrap_or_else ( |_| "unknown" . to_string ( ) ) ;
66+
67+ println ! ( "Component Versions:" ) ;
68+ println ! ( " datafusion-ducklake: {}" , env!( "CARGO_PKG_VERSION" ) ) ;
69+ println ! ( " datafusion: {}" , datafusion:: DATAFUSION_VERSION ) ;
70+ println ! ( " duckdb: {}" , duckdb_version) ;
4971}
5072
5173#[ tokio:: main]
5274async fn main ( ) -> Result < ( ) > {
5375 let args = Args :: parse ( ) ;
5476
77+ // Handle --show-versions flag
78+ if args. show_versions {
79+ print_versions ( ) ;
80+ return Ok ( ( ) ) ;
81+ }
82+
83+ // Validate catalog is provided for benchmarking
84+ let catalog = args. catalog . ok_or_else ( || {
85+ anyhow:: anyhow!( "--catalog is required. Use --show-versions to check component versions." )
86+ } ) ?;
87+
5588 println ! ( "╔═══════════════════════════════════════════════════════════════╗" ) ;
5689 println ! ( "║ DuckLake Query Engine Comparison ║" ) ;
5790 println ! ( "║ DuckDB-DuckLake vs DataFusion-DuckLake ║" ) ;
5891 println ! ( "╚═══════════════════════════════════════════════════════════════╝" ) ;
5992 println ! ( ) ;
60- println ! ( "Catalog: {:?}" , args. catalog) ;
93+ print_versions ( ) ;
94+ println ! ( ) ;
95+ println ! ( "Catalog: {:?}" , catalog) ;
6196 println ! ( "Iterations: {}" , args. iterations) ;
6297 println ! ( "Warmup: {}" , !args. no_warmup) ;
6398
6499 // Initialize runners
65- let duckdb_runner = duckdb_runner:: DuckDbRunner :: new ( & args . catalog ) ?;
66- let datafusion_runner = datafusion_runner:: DataFusionRunner :: new ( & args . catalog ) . await ?;
100+ let duckdb_runner = duckdb_runner:: DuckDbRunner :: new ( & catalog) ?;
101+ let datafusion_runner = datafusion_runner:: DataFusionRunner :: new ( & catalog) . await ?;
67102
68103 let mut all_results = Vec :: new ( ) ;
69104
@@ -143,10 +178,18 @@ async fn main() -> Result<()> {
143178 let datafusion_metrics =
144179 metrics:: benchmark_async ( || datafusion_runner. execute ( & query. sql ) , args. iterations )
145180 . await ?;
146- println ! (
181+ print ! (
147182 "avg={:>8.2}ms min={:>8.2}ms max={:>8.2}ms" ,
148183 datafusion_metrics. avg_ms, datafusion_metrics. min_ms, datafusion_metrics. max_ms
149184 ) ;
185+ if let Some ( phases) = & datafusion_metrics. phases {
186+ println ! (
187+ " (plan={:.1}ms, phys={:.1}ms, exec={:.1}ms)" ,
188+ phases. plan_ms, phases. physical_ms, phases. exec_ms
189+ ) ;
190+ } else {
191+ println ! ( ) ;
192+ }
150193
151194 // Show ratio
152195 let ratio = datafusion_metrics. avg_ms / duckdb_metrics. avg_ms ;
@@ -193,12 +236,27 @@ async fn main() -> Result<()> {
193236 // Separate warmup phase
194237 if !args. no_warmup {
195238 println ! ( "───────────────────────────────────────────────────────────────" ) ;
196- println ! ( "Warmup phase: running all queries once and verifying row counts..." ) ;
239+ if let Some ( timeout) = args. timeout_ms {
240+ println ! ( "Warmup phase: running all queries once (timeout={}ms)..." , timeout) ;
241+ } else {
242+ println ! ( "Warmup phase: running all queries once and verifying row counts..." ) ;
243+ }
197244 for ( i, query) in queries. iter ( ) . enumerate ( ) {
198245 print ! ( " [{:>2}/{}] {}... " , i + 1 , queries. len( ) , query. name) ;
199246
247+ let start = Instant :: now ( ) ;
200248 let duckdb_result = duckdb_runner. execute ( & query. sql ) ;
201249 let datafusion_result = datafusion_runner. execute ( & query. sql ) . await ;
250+ let elapsed_ms = start. elapsed ( ) . as_millis ( ) as u64 ;
251+
252+ // Check timeout
253+ if let Some ( timeout) = args. timeout_ms {
254+ if elapsed_ms > timeout {
255+ println ! ( "SKIP (too slow: {}ms > {}ms)" , elapsed_ms, timeout) ;
256+ skipped_queries. push ( ( query. name . clone ( ) , format ! ( "Timeout: {}ms" , elapsed_ms) ) ) ;
257+ continue ;
258+ }
259+ }
202260
203261 match ( & duckdb_result, & datafusion_result) {
204262 ( Ok ( duck) , Ok ( df) ) => {
@@ -212,7 +270,7 @@ async fn main() -> Result<()> {
212270 return Err ( e) ;
213271 }
214272 } else {
215- println ! ( "done ({} rows)" , duck. row_count) ;
273+ println ! ( "done ({} rows, {}ms )" , duck. row_count, elapsed_ms ) ;
216274 }
217275 } ,
218276 ( Err ( e) , _) => {
@@ -239,7 +297,7 @@ async fn main() -> Result<()> {
239297 }
240298 }
241299 if !skipped_queries. is_empty ( ) {
242- println ! ( "\n Skipped {} queries due to errors" , skipped_queries. len( ) ) ;
300+ println ! ( "\n Skipped {} queries due to errors/timeout " , skipped_queries. len( ) ) ;
243301 }
244302 println ! ( "Warmup complete.\n " ) ;
245303 }
@@ -277,10 +335,18 @@ async fn main() -> Result<()> {
277335 let datafusion_metrics =
278336 metrics:: benchmark_async ( || datafusion_runner. execute ( & query. sql ) , args. iterations )
279337 . await ?;
280- println ! (
338+ print ! (
281339 "avg={:>8.2}ms min={:>8.2}ms max={:>8.2}ms" ,
282340 datafusion_metrics. avg_ms, datafusion_metrics. min_ms, datafusion_metrics. max_ms
283341 ) ;
342+ if let Some ( phases) = & datafusion_metrics. phases {
343+ println ! (
344+ " (plan={:.1}ms, phys={:.1}ms, exec={:.1}ms)" ,
345+ phases. plan_ms, phases. physical_ms, phases. exec_ms
346+ ) ;
347+ } else {
348+ println ! ( ) ;
349+ }
284350
285351 // Show ratio
286352 let ratio = datafusion_metrics. avg_ms / duckdb_metrics. avg_ms ;
@@ -362,10 +428,18 @@ async fn main() -> Result<()> {
362428 let datafusion_metrics =
363429 metrics:: benchmark_async ( || datafusion_runner. execute ( sql) , args. iterations )
364430 . await ?;
365- println ! (
431+ print ! (
366432 "avg={:>8.2}ms min={:>8.2}ms max={:>8.2}ms" ,
367433 datafusion_metrics. avg_ms, datafusion_metrics. min_ms, datafusion_metrics. max_ms
368434 ) ;
435+ if let Some ( phases) = & datafusion_metrics. phases {
436+ println ! (
437+ " (plan={:.1}ms, phys={:.1}ms, exec={:.1}ms)" ,
438+ phases. plan_ms, phases. physical_ms, phases. exec_ms
439+ ) ;
440+ } else {
441+ println ! ( ) ;
442+ }
369443
370444 // Show ratio
371445 let ratio = datafusion_metrics. avg_ms / duckdb_metrics. avg_ms ;
0 commit comments