@@ -16,14 +16,17 @@ limitations under the License.
1616
1717use std:: sync:: Arc ;
1818
19+ use adbc_client:: AdbcConnection ;
1920use clap:: Parser ;
2021use data_generation:: config:: { DatasetConfig , TableFormat , TargetConfig } ;
2122use data_generation:: storage:: s3:: S3Storage ;
23+ use etl:: sink:: adbc:: AdbcSink ;
2224use etl:: { DatasetSource , ETLPipeline , PipelineState , StopReason } ;
25+ use serde_json:: Value ;
2326use tracing_subscriber:: EnvFilter ;
2427
2528#[ derive( Parser ) ]
26- #[ command( about = "Run an ETL pipeline that reads from S3, rehydrates data, and writes back to S3 " ) ]
29+ #[ command( about = "Run an ETL pipeline that reads from S3, rehydrates data, and writes directly to a SUT via ADBC " ) ]
2730struct Cli {
2831 /// Dataset type: "tpch" or "simple_sequence"
2932 #[ arg( long, default_value = "tpch" ) ]
@@ -45,11 +48,6 @@ struct Cli {
4548 #[ arg( long, default_value = "" ) ]
4649 source_prefix : String ,
4750
48- /// Base S3 key prefix for target (rehydrated) data.
49- /// A random suffix is appended automatically to create a unique destination per run.
50- #[ arg( long, default_value = "" ) ]
51- target_base_prefix : String ,
52-
5351 /// Logical table format propagated to system adapters
5452 #[ arg( long, value_enum, default_value = "parquet" ) ]
5553 table_format : TableFormat ,
@@ -65,6 +63,18 @@ struct Cli {
6563 /// S3 endpoint URL (for MinIO/LocalStack)
6664 #[ arg( long) ]
6765 endpoint : Option < String > ,
66+
67+ /// ADBC driver name (for example: databricks, flightsql)
68+ #[ arg( long) ]
69+ adbc_driver : String ,
70+
71+ /// ADBC connection URI passed as db option `uri`
72+ #[ arg( long) ]
73+ adbc_uri : String ,
74+
75+ /// Optional schema name to prefix destination table names
76+ #[ arg( long) ]
77+ adbc_schema : Option < String > ,
6878}
6979
7080impl Cli {
@@ -97,22 +107,6 @@ impl Cli {
97107 }
98108 }
99109
100- fn target_config ( & self ) -> TargetConfig {
101- let run_suffix = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
102- let prefix = if self . target_base_prefix . is_empty ( ) {
103- run_suffix
104- } else {
105- format ! ( "{}/{run_suffix}" , self . target_base_prefix)
106- } ;
107- TargetConfig {
108- bucket : self . bucket . clone ( ) ,
109- prefix,
110- table_format : self . table_format . clone ( ) ,
111- executor_instance_type : self . executor_instance_type . clone ( ) ,
112- region : self . region . clone ( ) ,
113- endpoint : self . endpoint . clone ( ) ,
114- }
115- }
116110}
117111
118112#[ tokio:: main]
@@ -127,15 +121,24 @@ async fn main() -> anyhow::Result<()> {
127121 let dataset_config = cli. dataset_config ( ) ;
128122
129123 let source = Arc :: new ( S3Storage :: new ( & cli. source_config ( ) ) ?) ;
130- let target = Arc :: new ( S3Storage :: new ( & cli. target_config ( ) ) ?) ;
124+
125+ let adbc_conn = AdbcConnection :: create (
126+ & cli. adbc_driver ,
127+ std:: collections:: HashMap :: from ( [ (
128+ "uri" . to_string ( ) ,
129+ Value :: String ( cli. adbc_uri . clone ( ) ) ,
130+ ) ] ) ,
131+ ) ?;
132+ let target = Arc :: new ( AdbcSink :: new ( adbc_conn, cli. adbc_schema . clone ( ) ) ) ;
131133
132134 let mut pipeline = ETLPipeline :: new ( dataset_source, & dataset_config, source, target) ?;
133135
134136 tracing:: info!(
135137 dataset = %cli. dataset,
136138 bucket = %cli. bucket,
137139 source_prefix = %cli. source_prefix,
138- target_base_prefix = %cli. target_base_prefix,
140+ adbc_driver = %cli. adbc_driver,
141+ adbc_schema = ?cli. adbc_schema,
139142 scale_factor = cli. scale_factor,
140143 num_steps = cli. num_steps,
141144 "Starting ETL pipeline"
0 commit comments