Updates

lukekim · lukekim · commit dc25e81ac742 · 2026-02-18T18:53:16.000-06:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -136,6 +136,7 @@ version.workspace = true
 [dependencies]
 adbc_client = { path = "crates/adbc_client" }
 arrow.workspace = true
+arrow-schema.workspace = true
 async-trait.workspace = true
 clap.workspace = true
 data-generation = { path = "crates/data-generation" }
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ flowchart TB
         direction TB
 
         subgraph setup_phase["1 · Setup (JSON-RPC)"]
-            adapter_iface["System Adapter Protocol\n(setup / create_tables /\nquery_method / teardown / metrics)"]
+            adapter_iface["System Adapter Protocol\n(setup / create_tables /\nteardown / metrics)"]
             spice["Spice Cloud Adapter"]
             databricks["Databricks Adapter"]
             other["... Other Adapters"]
@@ -104,9 +104,8 @@ flowchart TB
 
     orchestrator -->|"start run"| run
 
-    adapter_iface -->|"setup(run_id, datasets)"| sut
+    adapter_iface -->|"setup(run_id, datasets)\n→ ADBC driver + kwargs"| executors
     adapter_iface -->|"create_tables(run_id)"| sut
-    adapter_iface -->|"query_method(run_id)\n→ ADBC driver + kwargs"| executors
     setup_phase -->|"system ready"| bench_phase
     bench_phase -->|"benchmark complete"| teardown_phase
 
@@ -123,7 +122,7 @@ A **Run** is a single end-to-end execution of the benchmark for one system. Each
 
 | Phase                    | What happens                                                                                                                                                                                      | Timed? |
 | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ |
-| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT, then `create_tables(run_id)`, then `query_method(run_id)` to get ADBC driver config. | No     |
+| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT and return ADBC driver config, then `create_tables(run_id)`. | No     |
 | **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients).                                                    | Yes    |
 | **3. Teardown**          | Call `teardown(run_id)` via the adapter to deprovision resources and clean up.                                                                                                                    | No     |
 
@@ -148,7 +147,7 @@ Common CLI/workflow usage:
 | Component                   | Responsibility                                                                                                                                                |
 | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **GitHub Actions**          | Orchestrates Runs on schedule, PR, or manual dispatch. Manages the full Run lifecycle across phases.                                                          |
-| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `create_tables`, `query_method`, `teardown`, `metrics`.                           |
+| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `create_tables`, `teardown`, `metrics`.                                           |
 | **Query Executors**         | Pluggable query execution: ADBC direct (FlightSQL/Databricks drivers), HTTP (`/v1/sql`), or distributed (`/v1/queries` with polling).                         |
 | **Data Generator**          | Standalone binary (`data-generation`) that produces TPC-H partitioned Parquet batches and writes them to S3.                                                  |
 | **Test Framework**          | Core engine managing the warm-up → baseline → load test pipeline, query sets (TPC-H, TPC-DS, ClickBench, parameterized, scenario), and statistics collection. |
@@ -218,9 +217,8 @@ To benchmark a new platform, implement the JSON-RPC 2.0 adapter with these metho
 
 1. **`setup(run_id, datasets)`** — Provision infrastructure and configure the target system.
 2. **`create_tables(run_id)`** — Create/register destination tables for the benchmark datasets.
-3. **`query_method(run_id)`** — Return the ADBC driver type (`flightsql` or `databricks`) and connection kwargs so SpiceBench can establish a direct query connection.
-4. **`teardown(run_id)`** — Clean up provisioned resources.
-5. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
+3. **`teardown(run_id)`** — Clean up provisioned resources.
+4. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
 
 The adapter can run as a **stdio** child process or as an **HTTP** server.
 
@@ -247,9 +245,8 @@ For each run, SpiceBench calls adapter JSON-RPC methods in this order:
 
 1. `setup(run_id, datasets, metadata)`
 2. `create_tables(run_id)`
-3. `query_method(run_id)`
-4. benchmark execution and optional periodic `metrics(run_id)` scraping
-5. `teardown(run_id)`
+3. benchmark execution and optional periodic `metrics(run_id)` scraping
+4. `teardown(run_id)`
 
 Tiny `create_tables` request example:
 
diff --git a/crates/system-adapter-protocol/src/client.rs b/crates/system-adapter-protocol/src/client.rs
@@ -207,16 +207,6 @@ impl Client {
             .ok_or_else(|| ClientError::InvalidResponse("Missing result".to_string()))
     }
 
-    /// Get query method/driver information for a benchmark run
-    pub async fn query_method(&mut self, run_id: uuid::Uuid) -> Result<crate::QueryMethodResponse> {
-        let request = crate::QueryMethodRequest { run_id };
-        let rpc_request = JsonRpcRequest::new(1, crate::methods::QUERY_METHOD, request);
-        let response = self.call_typed(rpc_request).await?;
-        response
-            .result
-            .ok_or_else(|| ClientError::InvalidResponse("Missing result".to_string()))
-    }
-
     /// Teardown a benchmark run
     pub async fn teardown(&mut self, run_id: uuid::Uuid) -> Result<crate::TeardownResponse> {
         let request = crate::TeardownRequest { run_id };
diff --git a/crates/system-adapter-protocol/src/lib.rs b/crates/system-adapter-protocol/src/lib.rs
@@ -22,7 +22,7 @@ limitations under the License.
 //!
 //! # Features
 //!
-//! - **Protocol types**: Request/response types for setup, create_tables, query_method, and teardown
+//! - **Protocol types**: Request/response types for setup, create_tables, teardown, and metrics
 //! - **Client**: Ready-to-use client with Stdio and HTTP transports (requires `client` feature)
 //! - **Server**: Easy server implementation via Handler trait (requires `server` feature)
 //! - **JSON-RPC**: Standard JSON-RPC 2.0 envelope types
@@ -44,9 +44,7 @@ limitations under the License.
 //! let setup_response = client.setup(run_id, HashMap::new(), HashMap::new()).await?;
 //! let create_tables_response = client.create_tables(run_id).await?;
 //!
-//! // Get query method information
-//! let query_response = client.query_method(run_id).await?;
-//! println!("Driver: {:?}", query_response.driver);
+//! println!("Driver: {:?}", setup_response.driver);
 //!
 //! // Teardown the run
 //! let teardown_response = client.teardown(run_id).await?;
@@ -60,8 +58,8 @@ limitations under the License.
 //! # #[cfg(feature = "server")]
 //! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
 //! use system_adapter_protocol::{
-//!     AdbcDriver, CreateTablesResponse, DatasetConfig, Handler, QueryMethodResponse, Server,
-//!     SetupResponse, TeardownResponse,
+//!     AdbcDriver, CreateTablesResponse, DatasetConfig, Handler, Server, SetupResponse,
+//!     TeardownResponse,
 //! };
 //! use async_trait::async_trait;
 //! use std::collections::HashMap;
@@ -79,11 +77,7 @@ limitations under the License.
 //!     ) -> Result<SetupResponse, String> {
 //!         // Your setup logic here
 //!         let _ = metadata;
-//!         Ok(SetupResponse { ok: true })
-//!     }
-//!
-//!     async fn query_method(&mut self, run_id: Uuid) -> Result<QueryMethodResponse, String> {
-//!         Ok(QueryMethodResponse {
+//!         Ok(SetupResponse {
 //!             driver: AdbcDriver::Flightsql,
 //!             db_kwargs: HashMap::new(),
 //!         })
@@ -162,11 +156,13 @@ pub struct SetupRequest {
     pub metadata: HashMap<String, serde_json::Value>,
 }
 
-/// Response from setup request
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+/// Response from setup request containing ADBC connection information
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct SetupResponse {
-    /// Indicates if setup was successful
-    pub ok: bool,
+    /// ADBC driver to use for database connections
+    pub driver: AdbcDriver,
+    /// Driver-specific connection parameters
+    pub db_kwargs: HashMap<String, serde_json::Value>,
 }
 
 /// Request to create benchmark tables in the system under test.
@@ -185,24 +181,6 @@ pub struct CreateTablesResponse {
     pub ok: bool,
 }
 
-/// Request to get query method/driver information
-///
-/// JSON-RPC method: `query_method`
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct QueryMethodRequest {
-    /// Unique identifier for the benchmark run
-    pub run_id: Uuid,
-}
-
-/// Response containing database connection information
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub struct QueryMethodResponse {
-    /// ADBC driver to use for database connections
-    pub driver: AdbcDriver,
-    /// Driver-specific connection parameters
-    pub db_kwargs: HashMap<String, serde_json::Value>,
-}
-
 /// Request to teardown a benchmark run
 ///
 /// JSON-RPC method: `teardown`
@@ -377,7 +355,6 @@ pub mod error_codes {
 pub mod methods {
     pub const SETUP: &str = "setup";
     pub const CREATE_TABLES: &str = "create_tables";
-    pub const QUERY_METHOD: &str = "query_method";
     pub const TEARDOWN: &str = "teardown";
     pub const METRICS: &str = "metrics";
     pub const RPC_METHODS: &str = "rpc.methods";
diff --git a/crates/system-adapter-protocol/src/server.rs b/crates/system-adapter-protocol/src/server.rs
@@ -18,8 +18,8 @@ limitations under the License.
 
 use crate::{
     CreateTablesRequest, CreateTablesResponse, DatasetConfig, JsonRpcError, JsonRpcResponse,
-    MetricsRequest, MetricsResponse, QueryMethodRequest, QueryMethodResponse, SetupRequest,
-    SetupResponse, TeardownRequest, TeardownResponse, error_codes, methods,
+    MetricsRequest, MetricsResponse, SetupRequest, SetupResponse, TeardownRequest,
+    TeardownResponse, error_codes, methods,
 };
 use async_trait::async_trait;
 use serde::de::DeserializeOwned;
@@ -68,7 +68,7 @@ pub type Result<T> = std::result::Result<T, ServerError>;
 /// Handler trait for implementing system adapter logic
 ///
 /// Implement this trait to define how your system adapter handles
-/// setup, query_method, and teardown requests.
+/// setup, create_tables, and teardown requests.
 #[async_trait]
 pub trait Handler: Send + Sync {
     /// Setup a benchmark run
@@ -85,12 +85,6 @@ pub trait Handler: Send + Sync {
         run_id: Uuid,
     ) -> std::result::Result<CreateTablesResponse, String>;
 
-    /// Get query method/driver information for a benchmark run
-    async fn query_method(
-        &mut self,
-        run_id: Uuid,
-    ) -> std::result::Result<QueryMethodResponse, String>;
-
     /// Teardown a benchmark run
     async fn teardown(&mut self, run_id: Uuid) -> std::result::Result<TeardownResponse, String>;
 
@@ -111,7 +105,6 @@ pub trait Handler: Send + Sync {
         vec![
             methods::SETUP.to_string(),
             methods::CREATE_TABLES.to_string(),
-            methods::QUERY_METHOD.to_string(),
             methods::TEARDOWN.to_string(),
             methods::METRICS.to_string(),
             methods::RPC_METHODS.to_string(),
@@ -189,7 +182,6 @@ impl<H: Handler> Server<H> {
         let result = match method {
             methods::SETUP => self.handle_setup(&request, id.clone()).await,
             methods::CREATE_TABLES => self.handle_create_tables(&request, id.clone()).await,
-            methods::QUERY_METHOD => self.handle_query_method(&request, id.clone()).await,
             methods::TEARDOWN => self.handle_teardown(&request, id.clone()).await,
             methods::METRICS => self.handle_metrics(&request, id.clone()).await,
             methods::RPC_METHODS => self.handle_rpc_methods(id.clone()).await,
@@ -258,18 +250,6 @@ impl<H: Handler> Server<H> {
         )
     }
 
-    async fn handle_query_method(
-        &mut self,
-        request: &serde_json::Value,
-        id: serde_json::Value,
-    ) -> serde_json::Value {
-        let req: QueryMethodRequest = match Self::parse_params(request, &id) {
-            Ok(r) => r,
-            Err(e) => return e,
-        };
-        Self::handler_response(self.handler.query_method(req.run_id).await, id)
-    }
-
     async fn handle_create_tables(
         &mut self,
         request: &serde_json::Value,
@@ -327,14 +307,7 @@ mod tests {
             _datasets: HashMap<String, DatasetConfig>,
             _metadata: HashMap<String, serde_json::Value>,
         ) -> std::result::Result<SetupResponse, String> {
-            Ok(SetupResponse { ok: true })
-        }
-
-        async fn query_method(
-            &mut self,
-            _run_id: Uuid,
-        ) -> std::result::Result<QueryMethodResponse, String> {
-            Ok(QueryMethodResponse {
+            Ok(SetupResponse {
                 driver: crate::AdbcDriver::Flightsql,
                 db_kwargs: HashMap::new(),
             })
@@ -366,7 +339,7 @@ mod tests {
         let response = server.handle_request(request).await;
 
         assert!(response.get("result").is_some());
-        assert_eq!(response["result"]["ok"], true);
+        assert_eq!(response["result"]["driver"], "flightsql");
     }
 
     #[tokio::test]
diff --git a/src/main.rs b/src/main.rs
@@ -14,11 +14,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-use std::sync::Arc;
+use std::{collections::HashMap, sync::Arc};
 
 use adbc_client::AdbcConnection;
+use arrow_schema::{DataType, Field, Schema, TimeUnit};
 use clap::Parser;
 use data_generation::config::{DatasetConfig as GenerationDatasetConfig, TargetConfig};
+use data_generation::dataset::Dataset;
 use data_generation::dataset::MutationConfig;
 use data_generation::storage::s3::S3Storage;
 use etl::sink::adbc::AdbcSink;
@@ -35,6 +37,28 @@ mod scenario;
 use crate::commands::connect_system_adapter;
 use crate::scenario::Scenario;
 
+fn setup_request_datasets(dataset: &Arc<dyn Dataset>) -> HashMap<String, system_adapter_protocol::DatasetConfig> {
+    dataset
+        .tables()
+        .into_iter()
+        .map(|(name, table)| {
+            let mut fields: Vec<_> = table.schema.fields().iter().cloned().collect();
+            fields.push(Arc::new(Field::new(
+                "__created_at",
+                DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())),
+                true,
+            )));
+
+            (
+                name,
+                system_adapter_protocol::DatasetConfig {
+                    schema: Arc::new(Schema::new(fields)),
+                },
+            )
+        })
+        .collect()
+}
+
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
 struct Cli {
@@ -93,12 +117,29 @@ async fn main() -> anyhow::Result<()> {
     };
 
     let run_id = uuid::Uuid::new_v4();
+    let mutations = MutationConfig::new(0.1, 0.1);
 
-    // --- Query method from system adapter ---
-    let adbc_driver = match system_adapter_client.query_method(run_id).await {
-        Ok(method) => method,
+    let setup_dataset = dataset_source.create(&generation_config, &mutations)?;
+    let datasets = setup_request_datasets(&setup_dataset);
+
+    let setup_metadata = std::collections::HashMap::from([
+        (
+            "executor_instance_type".to_string(),
+            serde_json::Value::String(cli.common.executor_instance_type.clone()),
+        ),
+        (
+            "table_format".to_string(),
+            serde_json::Value::String(cli.common.table_format.to_string()),
+        ),
+    ]);
+
+    let adbc_driver = match system_adapter_client
+        .setup(run_id, datasets, setup_metadata)
+        .await
+    {
+        Ok(response) => response,
         Err(e) => {
-            return Err(anyhow::anyhow!("Failed to query system adapter: {e}"));
+            return Err(anyhow::anyhow!("Failed to setup system adapter: {e}"));
         }
     };
 
@@ -130,8 +171,6 @@ async fn main() -> anyhow::Result<()> {
         ));
     };
 
-    let mutations = MutationConfig::new(0.1, 0.1);
-
     let target = Arc::new(AdbcSink::new_without_table_creation(adbc_conn, None));
     let mut pipeline = ETLPipeline::new(
         dataset_source,
@@ -141,26 +180,6 @@ async fn main() -> anyhow::Result<()> {
         &mutations,
     )?;
 
-    let datasets = pipeline.setup_request_datasets();
-    let setup_metadata = std::collections::HashMap::from([
-        (
-            "executor_instance_type".to_string(),
-            serde_json::Value::String(cli.common.executor_instance_type.clone()),
-        ),
-        (
-            "table_format".to_string(),
-            serde_json::Value::String(cli.common.table_format.to_string()),
-        ),
-    ]);
-
-    if let Err(e) = system_adapter_client
-        .setup(run_id, datasets, setup_metadata)
-        .await
-    {
-        pipeline.cancel();
-        return Err(anyhow::anyhow!("Failed to setup system adapter: {e}"));
-    }
-
     if let Err(e) = system_adapter_client.create_tables(run_id).await {
         pipeline.cancel();
         return Err(anyhow::anyhow!(
diff --git a/system-adapters/databricks/src/main.rs b/system-adapters/databricks/src/main.rs