@@ -25,7 +25,6 @@ use data_generation::storage::s3::S3Storage;
2525use etl:: sink:: Sink ;
2626use etl:: sink:: adbc:: AdbcSink ;
2727use etl:: sink:: null:: NullSink ;
28- use etl:: sink:: s3_hive:: S3HiveSink ;
2928use etl:: { DatasetSource , ETLPipeline , PipelineState , StopReason } ;
3029use tracing_subscriber:: EnvFilter ;
3130
@@ -35,8 +34,6 @@ const DEFAULT_FLIGHTSQL_MAX_MSG_SIZE_BYTES: &str = "78643200";
3534#[ derive( Clone , Debug , Default , ValueEnum ) ]
3635enum SinkType {
3736 #[ default]
38- #[ value( name = "s3-hive" ) ]
39- S3Hive ,
4037 #[ value( name = "adbc" ) ]
4138 Adbc ,
4239 #[ value( name = "null" ) ]
@@ -45,7 +42,7 @@ enum SinkType {
4542
4643#[ derive( Parser ) ]
4744#[ command(
48- about = "Run an ETL pipeline that reads from a data archive, rehydrates data, and writes to S3 Hive, ADBC, or a null sink"
45+ about = "Run an ETL pipeline that reads from a data archive, rehydrates data, and writes to ADBC or a null sink"
4946) ]
5047struct Cli {
5148 /// Scenario name (e.g. "tpch") — used in the storage path `{prefix}/{scenario}/{version}/`
@@ -82,23 +79,11 @@ struct Cli {
8279 #[ arg( long) ]
8380 endpoint : Option < String > ,
8481
85- /// Base S3 key prefix for the ETL target (hive-partitioned output).
86- /// Defaults to the source prefix if not specified.
87- #[ arg( long, default_value = "" ) ]
88- target_prefix : String ,
89-
90- /// Ordered list of columns used for hive-style partitioning.
91- ///
92- /// Example: `--partition-by __created_at,product_type`
93- #[ arg( long, value_delimiter = ',' , default_value = "__created_at" ) ]
94- partition_by : Vec < String > ,
95-
9682 /// ETL sink target.
9783 ///
98- /// - s3-hive: write hive-partitioned parquet to S3
9984 /// - adbc: write via ADBC bulk ingest
10085 /// - null: discard all writes (throughput benchmark mode)
101- #[ arg( long, value_enum, default_value_t = SinkType :: S3Hive ) ]
86+ #[ arg( long, value_enum, default_value_t = SinkType :: Adbc ) ]
10287 sink : SinkType ,
10388
10489 /// ADBC driver name (for example: "databricks" or "flightsql").
@@ -271,55 +256,6 @@ async fn main() -> anyhow::Result<()> {
271256 Some ( adbc_sink) ,
272257 )
273258 }
274- SinkType :: S3Hive => {
275- if cli. adbc_driver . is_some ( )
276- || cli. adbc_uri . is_some ( )
277- || !cli. adbc_options . is_empty ( )
278- || cli. adbc_catalog . is_some ( )
279- || cli. adbc_schema . is_some ( )
280- || cli. adbc_create_tables
281- {
282- anyhow:: bail!(
283- "ADBC options are only valid with --sink adbc. Remove ADBC flags or set --sink adbc."
284- ) ;
285- }
286-
287- let hive_prefix = if cli. target_prefix . is_empty ( ) {
288- format ! (
289- "{}/{}/{}" ,
290- cli. prefix. trim_matches( '/' ) ,
291- cli. scenario,
292- version
293- )
294- } else {
295- format ! (
296- "{}/{}/{}" ,
297- cli. target_prefix. trim_matches( '/' ) ,
298- cli. scenario,
299- version
300- )
301- } ;
302-
303- let bucket = cli
304- . bucket
305- . as_ref ( )
306- . ok_or_else ( || anyhow:: anyhow!( "--bucket is required for --sink s3-hive" ) ) ?;
307-
308- let hive_config = TargetConfig {
309- bucket : bucket. clone ( ) ,
310- prefix : hive_prefix,
311- region : cli. region . clone ( ) ,
312- endpoint : cli. endpoint . clone ( ) ,
313- partition_columns : cli. partition_by . clone ( ) ,
314- } ;
315-
316- (
317- Arc :: new ( S3HiveSink :: new ( & hive_config) ?) ,
318- Some ( hive_config) ,
319- "s3-hive" . to_string ( ) ,
320- None ,
321- )
322- }
323259 SinkType :: Null => {
324260 if cli. adbc_driver . is_some ( )
325261 || cli. adbc_uri . is_some ( )
@@ -373,8 +309,6 @@ async fn main() -> anyhow::Result<()> {
373309 adbc_catalog = ?cli. adbc_catalog,
374310 adbc_schema = ?cli. adbc_schema,
375311 adbc_create_tables = cli. adbc_create_tables,
376- target_prefix = %cli. target_prefix,
377- partition_by = ?cli. partition_by,
378312 scale_factor = version_metadata. scale_factor,
379313 num_steps = version_metadata. num_steps,
380314 "Starting ETL pipeline"
0 commit comments