@@ -124,10 +124,13 @@ impl DatasetSource {
124124 & self ,
125125 config : & GenerationDatasetConfig ,
126126 mutations : & MutationConfig ,
127+ storage : Arc < dyn DataStorage > ,
127128 ) -> anyhow:: Result < Arc < dyn Dataset > > {
128129 match self {
129- DatasetSource :: SimpleSequence => SimpleSequenceDataset :: create ( config, mutations) ,
130- DatasetSource :: Tpch => TpchDataset :: create ( config, mutations) ,
130+ DatasetSource :: SimpleSequence => {
131+ SimpleSequenceDataset :: create ( config, mutations, storage)
132+ }
133+ DatasetSource :: Tpch => TpchDataset :: create ( config, mutations, storage) ,
131134 }
132135 }
133136}
@@ -224,7 +227,7 @@ impl ETLPipeline {
224227 data_sink : Arc < dyn Sink > ,
225228 mutations : & MutationConfig ,
226229 ) -> anyhow:: Result < Self > {
227- let dataset = dataset_source. create ( config, mutations) ?;
230+ let dataset = dataset_source. create ( config, mutations, Arc :: clone ( & data_storage ) ) ?;
228231 let ( state_tx, state_rx) = watch:: channel ( PipelineState :: NotStarted ) ;
229232 Ok ( Self {
230233 dataset_source,
@@ -399,7 +402,7 @@ impl ETLPipeline {
399402 /// processed, the [`CancellationToken`] is triggered, or an error occurs.
400403 ///
401404 /// Returns an error if the pipeline is not in the [`Initialized`] state.
402- pub fn start ( & mut self ) -> anyhow:: Result < ( ) > {
405+ pub async fn start ( & mut self ) -> anyhow:: Result < ( ) > {
403406 let current_state = self . state_rx . borrow ( ) . clone ( ) ;
404407 if current_state != PipelineState :: Initialized {
405408 anyhow:: bail!(
@@ -409,7 +412,7 @@ impl ETLPipeline {
409412 }
410413
411414 self . batch_budget = None ;
412- self . build_work_plan ( ) ;
415+ self . build_work_plan ( ) . await ;
413416 self . spawn_run_task ( None ) ;
414417 Ok ( ( ) )
415418 }
@@ -427,7 +430,7 @@ impl ETLPipeline {
427430 /// [`PipelineState::Stopped(StopReason::Completed)`].
428431 ///
429432 /// Returns an error if the pipeline is not in the [`Initialized`] state.
430- pub fn run ( & mut self , step_count : usize ) -> anyhow:: Result < ( ) > {
433+ pub async fn run ( & mut self , step_count : usize ) -> anyhow:: Result < ( ) > {
431434 let current_state = self . state_rx . borrow ( ) . clone ( ) ;
432435 if current_state != PipelineState :: Initialized {
433436 anyhow:: bail!(
@@ -437,7 +440,7 @@ impl ETLPipeline {
437440 }
438441
439442 self . batch_budget = Some ( step_count) ;
440- self . build_work_plan ( ) ;
443+ self . build_work_plan ( ) . await ;
441444 self . spawn_run_task ( Some ( step_count) ) ;
442445 Ok ( ( ) )
443446 }
@@ -473,13 +476,13 @@ impl ETLPipeline {
473476
474477 /// Build the initial work plan from the dataset and store it in
475478 /// `self.work_state`.
476- fn build_work_plan ( & self ) {
479+ async fn build_work_plan ( & self ) {
477480 let dataset = & self . dataset ;
478481 let tables = dataset. tables ( ) ;
479482 let mut steps: BTreeMap < u64 , Vec < String > > = BTreeMap :: new ( ) ;
480483
481484 for name in tables. keys ( ) {
482- for id in dataset. batch_ids ( name) {
485+ for id in dataset. clone ( ) . batch_ids ( name) . await {
483486 // Skip batch 0 — it was already processed during initialize().
484487 if id == 0 {
485488 continue ;
0 commit comments