@@ -24,11 +24,14 @@ use data_generation::dataset::tpch::TpchDataset;
2424use data_generation:: source:: Source ;
2525use data_generation:: target:: Target ;
2626use std:: collections:: { BTreeMap , HashSet } ;
27+ use std:: sync:: Arc as StdArc ;
28+ use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
29+ use std:: time:: Instant ;
2730use system_adapter_protocol:: { DatasetConfig as ProtocolDatasetConfig , EtlType } ;
2831use tokio:: sync:: watch;
2932use tokio:: task:: { JoinHandle , JoinSet } ;
3033use tokio_util:: sync:: CancellationToken ;
31- use tracing:: { error, info, warn} ;
34+ use tracing:: { debug , error, info, warn} ;
3235
3336type DynSource = Arc < dyn Source > ;
3437type DynTarget = Arc < dyn Target > ;
@@ -239,7 +242,7 @@ impl ETLPipeline {
239242 . map_err ( |e| format ! ( "write {table_name} batch {first_batch_id}: {e}" ) ) ?;
240243 }
241244
242- info ! (
245+ debug ! (
243246 table = %table_name,
244247 batch_id = first_batch_id,
245248 "Initial batch processed"
@@ -363,6 +366,46 @@ async fn run_pipeline(
363366 let total_batches = work. len ( ) ;
364367 info ! ( total_steps, total_batches, "ETL pipeline started" ) ;
365368
369+ // Shared progress counters for periodic logging.
370+ let steps_completed = StdArc :: new ( AtomicU64 :: new ( 0 ) ) ;
371+ let batches_processed = StdArc :: new ( AtomicU64 :: new ( 0 ) ) ;
372+ let tables_finished = StdArc :: new ( AtomicU64 :: new ( 0 ) ) ;
373+ let pipeline_start = Instant :: now ( ) ;
374+
375+ // Spawn periodic progress logger (every 5 seconds).
376+ let progress_logger = {
377+ let steps_completed = StdArc :: clone ( & steps_completed) ;
378+ let batches_processed = StdArc :: clone ( & batches_processed) ;
379+ let tables_finished = StdArc :: clone ( & tables_finished) ;
380+ let cancel = cancel. clone ( ) ;
381+ tokio:: spawn ( async move {
382+ let mut interval = tokio:: time:: interval ( std:: time:: Duration :: from_secs ( 5 ) ) ;
383+ loop {
384+ tokio:: select! {
385+ _ = interval. tick( ) => {
386+ let elapsed = pipeline_start. elapsed( ) ;
387+ let secs = elapsed. as_secs_f64( ) ;
388+ if secs < 0.001 {
389+ continue ;
390+ }
391+ let steps_done = steps_completed. load( Ordering :: Relaxed ) ;
392+ let batches_done = batches_processed. load( Ordering :: Relaxed ) ;
393+ let tables_done = tables_finished. load( Ordering :: Relaxed ) ;
394+ info!(
395+ elapsed_secs = format!( "{secs:.1}" ) ,
396+ steps = format!( "{steps_done}/{total_steps}" ) ,
397+ batches = format!( "{batches_done}/{total_batches}" ) ,
398+ tables_finished = tables_done,
399+ batches_per_sec = format!( "{:.1}" , batches_done as f64 / secs) ,
400+ "ETL progress"
401+ ) ;
402+ }
403+ ( ) = cancel. cancelled( ) => break ,
404+ }
405+ }
406+ } )
407+ } ;
408+
366409 // Tables whose data has been fully consumed (source returned `None`).
367410 let mut finished_tables: HashSet < String > = HashSet :: new ( ) ;
368411
@@ -394,7 +437,7 @@ async fn run_pipeline(
394437 let read_result = match source. read_batch ( & table_name, batch_id) . await {
395438 Ok ( Some ( r) ) => r,
396439 Ok ( None ) => {
397- info ! (
440+ debug ! (
398441 table = %table_name,
399442 batch_id,
400443 "No more batches for table, marking as finished"
@@ -439,7 +482,7 @@ async fn run_pipeline(
439482 }
440483 }
441484
442- info ! (
485+ debug ! (
443486 table = %table_name,
444487 batch_id,
445488 "Table batch processed"
@@ -449,29 +492,43 @@ async fn run_pipeline(
449492 }
450493
451494 // Collect results from all concurrent table tasks in this step.
495+ let mut step_batch_count: u64 = 0 ;
452496 while let Some ( result) = join_set. join_next ( ) . await {
453497 match result {
454498 Ok ( Ok ( ( table_name, is_finished) ) ) => {
499+ step_batch_count += 1 ;
455500 if is_finished {
456501 finished_tables. insert ( table_name) ;
502+ tables_finished. fetch_add ( 1 , Ordering :: Relaxed ) ;
457503 }
458504 }
459505 Ok ( Err ( err_msg) ) => {
506+ progress_logger. abort ( ) ;
460507 return StopReason :: Error ( err_msg) ;
461508 }
462509 Err ( e) => {
510+ progress_logger. abort ( ) ;
463511 return StopReason :: Error ( format ! ( "Task panicked: {e}" ) ) ;
464512 }
465513 }
466514 }
467515
468- info ! (
516+ steps_completed. fetch_add ( 1 , Ordering :: Relaxed ) ;
517+ batches_processed. fetch_add ( step_batch_count, Ordering :: Relaxed ) ;
518+
519+ debug ! (
469520 batch_id,
470521 progress = format!( "{}/{}" , step_idx + 1 , total_steps) ,
471522 "Step completed"
472523 ) ;
473524 }
474525
475- info ! ( "ETL pipeline completed successfully" ) ;
526+ progress_logger. abort ( ) ;
527+ info ! (
528+ elapsed = ?pipeline_start. elapsed( ) ,
529+ steps = total_steps,
530+ batches = total_batches,
531+ "ETL pipeline completed successfully"
532+ ) ;
476533 StopReason :: Completed
477534}
0 commit comments