spiceai
diff --git a/‎Cargo.lock‎
Lines changed: 5 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎crates/etl/Cargo.toml‎
Lines changed: 5 additions & 0 deletions b/‎crates/etl/Cargo.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎crates/etl/README.md‎
Lines changed: 1 addition & 1 deletion b/‎crates/etl/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/etl/src/lib.rs‎
Lines changed: 4 additions & 4 deletions b/‎crates/etl/src/lib.rs‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎crates/etl/src/main.rs‎
Lines changed: 27 additions & 24 deletions b/‎crates/etl/src/main.rs‎
Lines changed: 27 additions & 24 deletions
@@ -17,9 +17,14 @@ name = "etl"
 path = "src/main.rs"
 
 [dependencies]
+adbc_client = { path = "../adbc_client" }
 anyhow.workspace = true
+arrow.workspace = true
+async-trait.workspace = true
+chrono.workspace = true
 clap = { workspace = true, features = ["derive"] }
 data-generation = { path = "../data-generation" }
+serde_json.workspace = true
 system-adapter-protocol = { path = "../system-adapter-protocol" }
 tokio.workspace = true
 tokio-util.workspace = true
 
@@ -1,5 +1,5 @@
 # ETL Pipeline
 
 ```bash
-cargo run -p etl -- --bucket peasee-indexes --region us-west-2 --source-prefix raw --target-prefix rehydrated --dataset tpch --scale-factor 1.0 --num-steps 10
+cargo run -p etl -- --bucket peasee-indexes --region us-west-2 --source-prefix raw --dataset tpch --scale-factor 1.0 --num-steps 10 --adbc-driver databricks --adbc-uri "databricks://token:...@dbc-xxxx.cloud.databricks.com?http_path=..."
 ```
@@ -27,12 +27,14 @@ use std::collections::{BTreeMap, HashSet};
 use std::sync::Arc as StdArc;
 use std::sync::atomic::{AtomicU64, Ordering};
 use std::time::Instant;
-use system_adapter_protocol::{DatasetConfig as ProtocolDatasetConfig, EtlType};
+use system_adapter_protocol::DatasetConfig as ProtocolDatasetConfig;
 use tokio::sync::watch;
 use tokio::task::{JoinHandle, JoinSet};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, warn};
 
+pub mod sink;
+
 type DynSource = Arc<dyn Source>;
 type DynTarget = Arc<dyn Target>;
 
@@ -177,7 +179,7 @@ impl ETLPipeline {
     ///
     /// Each entry maps a table name to its
     /// [`DatasetConfig`](system_adapter_protocol::DatasetConfig), which includes
-    /// the rehydrated Arrow schema and the ETL type. This can be used to build a
+    /// the rehydrated Arrow schema. This can be used to build a
     /// [`SetupRequest`](system_adapter_protocol::SetupRequest) for the system
     /// adapter.
     pub fn setup_request_datasets(&self) -> HashMap<String, ProtocolDatasetConfig> {
@@ -186,9 +188,7 @@ impl ETLPipeline {
             .into_iter()
             .map(|(name, table)| {
                 let config = ProtocolDatasetConfig {
-                    etl_type: EtlType::S3,
                     schema: table.rehydrated_schema(),
-                    params: self.target.table_params(&name),
                 };
                 (name, config)
             })
 
@@ -16,14 +16,17 @@ limitations under the License.
 
 use std::sync::Arc;
 
+use adbc_client::AdbcConnection;
 use clap::Parser;
 use data_generation::config::{DatasetConfig, TableFormat, TargetConfig};
 use data_generation::storage::s3::S3Storage;
+use etl::sink::adbc::AdbcSink;
 use etl::{DatasetSource, ETLPipeline, PipelineState, StopReason};
+use serde_json::Value;
 use tracing_subscriber::EnvFilter;
 
 #[derive(Parser)]
-#[command(about = "Run an ETL pipeline that reads from S3, rehydrates data, and writes back to S3")]
+#[command(about = "Run an ETL pipeline that reads from S3, rehydrates data, and writes directly to a SUT via ADBC")]
 struct Cli {
     /// Dataset type: "tpch" or "simple_sequence"
     #[arg(long, default_value = "tpch")]
@@ -45,11 +48,6 @@ struct Cli {
     #[arg(long, default_value = "")]
     source_prefix: String,
 
-    /// Base S3 key prefix for target (rehydrated) data.
-    /// A random suffix is appended automatically to create a unique destination per run.
-    #[arg(long, default_value = "")]
-    target_base_prefix: String,
-
     /// Logical table format propagated to system adapters
     #[arg(long, value_enum, default_value = "parquet")]
     table_format: TableFormat,
@@ -65,6 +63,18 @@ struct Cli {
     /// S3 endpoint URL (for MinIO/LocalStack)
     #[arg(long)]
     endpoint: Option<String>,
+
+    /// ADBC driver name (for example: databricks, flightsql)
+    #[arg(long)]
+    adbc_driver: String,
+
+    /// ADBC connection URI passed as db option `uri`
+    #[arg(long)]
+    adbc_uri: String,
+
+    /// Optional schema name to prefix destination table names
+    #[arg(long)]
+    adbc_schema: Option<String>,
 }
 
 impl Cli {
@@ -97,22 +107,6 @@ impl Cli {
         }
     }
 
-    fn target_config(&self) -> TargetConfig {
-        let run_suffix = uuid::Uuid::new_v4().to_string();
-        let prefix = if self.target_base_prefix.is_empty() {
-            run_suffix
-        } else {
-            format!("{}/{run_suffix}", self.target_base_prefix)
-        };
-        TargetConfig {
-            bucket: self.bucket.clone(),
-            prefix,
-            table_format: self.table_format.clone(),
-            executor_instance_type: self.executor_instance_type.clone(),
-            region: self.region.clone(),
-            endpoint: self.endpoint.clone(),
-        }
-    }
 }
 
 #[tokio::main]
@@ -127,15 +121,24 @@ async fn main() -> anyhow::Result<()> {
     let dataset_config = cli.dataset_config();
 
     let source = Arc::new(S3Storage::new(&cli.source_config())?);
-    let target = Arc::new(S3Storage::new(&cli.target_config())?);
+
+    let adbc_conn = AdbcConnection::create(
+        &cli.adbc_driver,
+        std::collections::HashMap::from([(
+            "uri".to_string(),
+            Value::String(cli.adbc_uri.clone()),
+        )]),
+    )?;
+    let target = Arc::new(AdbcSink::new(adbc_conn, cli.adbc_schema.clone()));
 
     let mut pipeline = ETLPipeline::new(dataset_source, &dataset_config, source, target)?;
 
     tracing::info!(
         dataset = %cli.dataset,
         bucket = %cli.bucket,
         source_prefix = %cli.source_prefix,
-        target_base_prefix = %cli.target_base_prefix,
+        adbc_driver = %cli.adbc_driver,
+        adbc_schema = ?cli.adbc_schema,
         scale_factor = cli.scale_factor,
         num_steps = cli.num_steps,
         "Starting ETL pipeline"