spiceai
diff --git a/‎.claude/skills/system-adapter-builder/SKILL.md‎
Lines changed: 10 additions & 6 deletions b/‎.claude/skills/system-adapter-builder/SKILL.md‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎.github/workflows/validate_system_adapter_templates.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/validate_system_adapter_templates.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 38 additions & 13 deletions b/‎README.md‎
Lines changed: 38 additions & 13 deletions
diff --git a/‎crates/data-generation/src/metrics.rs‎
Lines changed: 21 additions & 21 deletions b/‎crates/data-generation/src/metrics.rs‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎crates/etl/src/sink/adbc.rs‎
Lines changed: 17 additions & 3 deletions b/‎crates/etl/src/sink/adbc.rs‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎crates/system-adapter-protocol/src/client.rs‎
Lines changed: 13 additions & 0 deletions b/‎crates/system-adapter-protocol/src/client.rs‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎crates/system-adapter-protocol/src/lib.rs‎
Lines changed: 25 additions & 3 deletions b/‎crates/system-adapter-protocol/src/lib.rs‎
Lines changed: 25 additions & 3 deletions
@@ -1,6 +1,6 @@
 ---
 name: system-adapter-builder
-description: Build or update a SpiceBench system adapter with JSON-RPC over stdio and HTTP, including setup/query_method/teardown/metrics support and template validation.
+description: Build or update a SpiceBench system adapter with JSON-RPC over stdio and HTTP, including setup/create_tables/query_method/teardown/metrics support and template validation.
 ---
 
 # SpiceBench System Adapter Builder
@@ -17,6 +17,7 @@ A JSON-RPC 2.0 adapter that supports both transports:
 Required methods:
 
 - `setup(run_id, datasets)`
+- `create_tables(run_id)`
 - `query_method(run_id)`
 - `teardown(run_id)`
 - `metrics(run_id)`
@@ -34,15 +35,17 @@ Required methods:
 
 1. Copy the nearest template from `system-adapters/templates/<language>`.
 2. Keep request/response envelopes JSON-RPC 2.0 compliant (`jsonrpc`, `id`, `method`, `params`).
-3. Implement `setup` and `teardown` with run-scoped resources keyed by `run_id`.
-4. Implement `query_method` to return:
+3. Implement `setup` with run-scoped resources keyed by `run_id`.
+4. Implement `create_tables` so the adapter creates/registers benchmark destination tables.
+5. Implement `query_method` to return:
    - `driver`: typically `flightsql` or `databricks`
    - `db_kwargs`: real endpoint + auth kwargs for the SUT
-5. Implement `metrics` to return both objects:
+6. Implement `teardown` with run-scoped cleanup keyed by `run_id`.
+7. Implement `metrics` to return both objects:
    - `resource`: CPU, memory, disk bytes, disk IOPS
    - `ingestion`: rows, bytes, rows/s, active connections
-6. Keep stdio and HTTP using the same dispatcher so behavior is identical.
-7. Return JSON-RPC errors with standard codes:
+8. Keep stdio and HTTP using the same dispatcher so behavior is identical.
+9. Return JSON-RPC errors with standard codes:
    - `-32700` parse error
    - `-32600` invalid request
    - `-32601` method not found
@@ -70,6 +73,7 @@ If any metric is unavailable, return `0`/`0.0` and document why.
 
 - Adapter responds to all required methods over stdio and HTTP.
 - `rpc.methods` includes every exposed method.
+- `create_tables` creates/registers benchmark tables for each dataset.
 - `query_method` returns a valid `driver` and complete `db_kwargs`.
 - `metrics` returns both `resource` and `ingestion` objects.
 - Language build/syntax checks pass:
 
@@ -152,9 +152,10 @@ jobs:
         working-directory: system-adapters/templates/go
         run: go build ./...
       - name: Smoke test Go template (HTTP setup)
+        working-directory: system-adapters/templates/go
         run: |
           set -euo pipefail
-          go run ./system-adapters/templates/go --transport http --host 127.0.0.1 --port 18083 --path /jsonrpc > /tmp/go-template.log 2>&1 &
+          go run . --transport http --host 127.0.0.1 --port 18083 --path /jsonrpc > /tmp/go-template.log 2>&1 &
           pid=$!
           trap 'kill "$pid" 2>/dev/null || true' EXIT
 
 
@@ -17,7 +17,7 @@ flowchart TB
         direction TB
 
         subgraph setup_phase["1 · Setup (JSON-RPC)"]
-            adapter_iface["System Adapter Protocol\n(setup / query_method /\nteardown / metrics)"]
+            adapter_iface["System Adapter Protocol\n(setup / create_tables /\nquery_method / teardown / metrics)"]
             spice["Spice Cloud Adapter"]
             databricks["Databricks Adapter"]
             other["... Other Adapters"]
@@ -104,7 +104,8 @@ flowchart TB
 
     orchestrator -->|"start run"| run
 
-    adapter_iface -->|"setup(run_id)"| sut
+    adapter_iface -->|"setup(run_id, datasets)"| sut
+    adapter_iface -->|"create_tables(run_id)"| sut
     adapter_iface -->|"query_method(run_id)\n→ ADBC driver + kwargs"| executors
     setup_phase -->|"system ready"| bench_phase
     bench_phase -->|"benchmark complete"| teardown_phase
@@ -120,11 +121,11 @@ flowchart TB
 
 A **Run** is a single end-to-end execution of the benchmark for one system. Each Run proceeds through three phases:
 
-| Phase                    | What happens                                                                                                                                                       | Timed? |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ |
-| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT then `query_method(run_id)` to get ADBC driver config. | No     |
-| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients).                     | Yes    |
-| **3. Teardown**          | Call `teardown(run_id)` via the adapter to deprovision resources and clean up.                                                                                     | No     |
+| Phase                    | What happens                                                                                                                                                                                      | Timed? |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ |
+| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT, then `create_tables(run_id)`, then `query_method(run_id)` to get ADBC driver config. | No     |
+| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients).                                                    | Yes    |
+| **3. Teardown**          | Call `teardown(run_id)` via the adapter to deprovision resources and clean up.                                                                                                                    | No     |
 
 The **E2E benchmark duration** (phase 2, load test stage) is the primary ranking metric. After the load test, each query's p99 latency is compared against the baseline: >20% increase = FAIL, 10–20% = WARN, ≥3 WARNs = FAIL.
 
@@ -147,7 +148,7 @@ Common CLI/workflow usage:
 | Component                   | Responsibility                                                                                                                                                |
 | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **GitHub Actions**          | Orchestrates Runs on schedule, PR, or manual dispatch. Manages the full Run lifecycle across phases.                                                          |
-| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `query_method`, `teardown`, `metrics`.                                            |
+| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `create_tables`, `query_method`, `teardown`, `metrics`.                           |
 | **Query Executors**         | Pluggable query execution: ADBC direct (FlightSQL/Databricks drivers), HTTP (`/v1/sql`), or distributed (`/v1/queries` with polling).                         |
 | **Data Generator**          | Standalone binary (`data-generation`) that produces TPC-H partitioned Parquet batches and writes them to S3.                                                  |
 | **Test Framework**          | Core engine managing the warm-up → baseline → load test pipeline, query sets (TPC-H, TPC-DS, ClickBench, parameterized, scenario), and statistics collection. |
@@ -216,9 +217,10 @@ Results from every Run are published to [SpiceBench.com](https://spicebench.com)
 To benchmark a new platform, implement the JSON-RPC 2.0 adapter with these methods:
 
 1. **`setup(run_id, datasets)`** — Provision infrastructure and configure the target system.
-2. **`query_method(run_id)`** — Return the ADBC driver type (`flightsql` or `databricks`) and connection kwargs so SpiceBench can establish a direct query connection.
-3. **`teardown(run_id)`** — Clean up provisioned resources.
-4. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
+2. **`create_tables(run_id)`** — Create/register destination tables for the benchmark datasets.
+3. **`query_method(run_id)`** — Return the ADBC driver type (`flightsql` or `databricks`) and connection kwargs so SpiceBench can establish a direct query connection.
+4. **`teardown(run_id)`** — Clean up provisioned resources.
+5. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
 
 The adapter can run as a **stdio** child process or as an **HTTP** server.
 
@@ -237,7 +239,30 @@ The `spicebench` CLI connects to a system adapter using JSON-RPC 2.0 over either
 - **stdio transport**: use `--system-adapter-stdio-cmd` (SpiceBench starts the child process).
 - **HTTP transport**: use `--system-adapter-http-url` (SpiceBench connects to a remote adapter endpoint).
 - **execution mode**: `adapter-command` (default) dispatches `spicebench run ...` to adapter JSON-RPC `run.load`.
-- **execution mode**: `direct-query` runs the load/query path directly via ADBC, using the adapter only for setup/teardown/metrics.
+- **execution mode**: `direct-query` runs the load/query path directly via ADBC, using the adapter for setup/table creation/teardown/metrics.
+
+#### Adapter lifecycle (direct-query mode)
+
+For each run, SpiceBench calls adapter JSON-RPC methods in this order:
+
+1. `setup(run_id, datasets, metadata)`
+2. `create_tables(run_id)`
+3. `query_method(run_id)`
+4. benchmark execution and optional periodic `metrics(run_id)` scraping
+5. `teardown(run_id)`
+
+Tiny `create_tables` request example:
+
+```json
+{
+    "jsonrpc": "2.0",
+    "id": 2,
+    "method": "create_tables",
+    "params": {
+        "run_id": "00000000-0000-0000-0000-000000000000"
+    }
+}
+```
 
 #### Stdio example (child process started by SpiceBench)
 
@@ -269,7 +294,7 @@ Notes:
 - `--system-adapter-stdio-args` passes CLI args to the stdio adapter command.
 - `--system-adapter-env` is only valid for stdio transport.
 
-#### Direct-query example (ADBC query path, adapter for setup/teardown only)
+#### Direct-query example (ADBC query path, adapter for setup/table creation/teardown)
 
 ```bash
 spicebench \
 
@@ -84,29 +84,29 @@ impl Metrics {
         self.inner.batches_generated.fetch_add(1, Ordering::Relaxed);
 
         // Count inserts, updates, and deletes from the `_op` column.
-        if let Ok(idx) = batch.schema().index_of("_op") {
-            if let Some(op_array) = batch.column(idx).as_any().downcast_ref::<StringArray>() {
-                let mut creates = 0u64;
-                let mut updates = 0u64;
-                let mut deletes = 0u64;
-                for i in 0..op_array.len() {
-                    match op_array.value(i) {
-                        "c" => creates += 1,
-                        "u" => updates += 1,
-                        "d" => deletes += 1,
-                        _ => {}
-                    }
+        if let Ok(idx) = batch.schema().index_of("_op")
+            && let Some(op_array) = batch.column(idx).as_any().downcast_ref::<StringArray>()
+        {
+            let mut creates = 0u64;
+            let mut updates = 0u64;
+            let mut deletes = 0u64;
+            for i in 0..op_array.len() {
+                match op_array.value(i) {
+                    "c" => creates += 1,
+                    "u" => updates += 1,
+                    "d" => deletes += 1,
+                    _ => {}
                 }
-                self.inner
-                    .rows_created
-                    .fetch_add(creates, Ordering::Relaxed);
-                self.inner
-                    .rows_updated
-                    .fetch_add(updates, Ordering::Relaxed);
-                self.inner
-                    .rows_deleted
-                    .fetch_add(deletes, Ordering::Relaxed);
             }
+            self.inner
+                .rows_created
+                .fetch_add(creates, Ordering::Relaxed);
+            self.inner
+                .rows_updated
+                .fetch_add(updates, Ordering::Relaxed);
+            self.inner
+                .rows_deleted
+                .fetch_add(deletes, Ordering::Relaxed);
         }
     }
 
 
@@ -34,13 +34,15 @@ const DEFAULT_INSERT_ROWS_PER_STATEMENT: usize = 2048;
 
 /// ETL sink that writes transformed batches directly into the SUT via ADBC SQL.
 ///
-/// This sink creates destination tables on first write (`CREATE TABLE IF NOT EXISTS`)
-/// and appends rows with batched `INSERT INTO ... VALUES` statements.
+/// This sink appends rows with batched `INSERT INTO ... VALUES` statements.
+/// Table auto-creation is optional and can be disabled when tables are managed
+/// externally (for example by a system adapter RPC method).
 pub struct AdbcSink {
     conn: Arc<Mutex<AdbcConnection>>,
     created_tables: TokioMutex<HashSet<String>>,
     schema_name: Option<String>,
     insert_rows_per_statement: usize,
+    auto_create_tables: bool,
 }
 
 impl AdbcSink {
@@ -51,6 +53,18 @@ impl AdbcSink {
             created_tables: TokioMutex::new(HashSet::new()),
             schema_name,
             insert_rows_per_statement: DEFAULT_INSERT_ROWS_PER_STATEMENT,
+            auto_create_tables: true,
+        }
+    }
+
+    #[must_use]
+    pub fn new_without_table_creation(conn: AdbcConnection, schema_name: Option<String>) -> Self {
+        Self {
+            conn: Arc::new(Mutex::new(conn)),
+            created_tables: TokioMutex::new(HashSet::new()),
+            schema_name,
+            insert_rows_per_statement: DEFAULT_INSERT_ROWS_PER_STATEMENT,
+            auto_create_tables: false,
         }
     }
 
@@ -132,7 +146,7 @@ impl Sink for AdbcSink {
         let should_ensure_table = matches!(op, InsertOp::Insert | InsertOp::Update { .. });
         let mut newly_created = false;
 
-        if should_ensure_table {
+        if should_ensure_table && self.auto_create_tables {
             let created = self.created_tables.lock().await;
             if !created.contains(table_name) {
                 preamble_statements.push(self.create_table_sql(table_name, &batch.schema())?);
 
@@ -194,6 +194,19 @@ impl Client {
             .ok_or_else(|| ClientError::InvalidResponse("Missing result".to_string()))
     }
 
+    /// Create benchmark tables for a benchmark run
+    pub async fn create_tables(
+        &mut self,
+        run_id: uuid::Uuid,
+    ) -> Result<crate::CreateTablesResponse> {
+        let request = crate::CreateTablesRequest { run_id };
+        let rpc_request = JsonRpcRequest::new(1, crate::methods::CREATE_TABLES, request);
+        let response = self.call_typed(rpc_request).await?;
+        response
+            .result
+            .ok_or_else(|| ClientError::InvalidResponse("Missing result".to_string()))
+    }
+
     /// Get query method/driver information for a benchmark run
     pub async fn query_method(&mut self, run_id: uuid::Uuid) -> Result<crate::QueryMethodResponse> {
         let request = crate::QueryMethodRequest { run_id };
 
@@ -22,7 +22,7 @@ limitations under the License.
 //!
 //! # Features
 //!
-//! - **Protocol types**: Request/response types for setup, query_method, and teardown
+//! - **Protocol types**: Request/response types for setup, create_tables, query_method, and teardown
 //! - **Client**: Ready-to-use client with Stdio and HTTP transports (requires `client` feature)
 //! - **Server**: Easy server implementation via Handler trait (requires `server` feature)
 //! - **JSON-RPC**: Standard JSON-RPC 2.0 envelope types
@@ -42,6 +42,7 @@ limitations under the License.
 //! // Setup a benchmark run
 //! let run_id = Uuid::new_v4();
 //! let setup_response = client.setup(run_id, HashMap::new(), HashMap::new()).await?;
+//! let create_tables_response = client.create_tables(run_id).await?;
 //!
 //! // Get query method information
 //! let query_response = client.query_method(run_id).await?;
@@ -59,8 +60,8 @@ limitations under the License.
 //! # #[cfg(feature = "server")]
 //! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
 //! use system_adapter_protocol::{
-//!     Handler, Server, SetupResponse, QueryMethodResponse, TeardownResponse,
-//!     AdbcDriver, DatasetConfig
+//!     AdbcDriver, CreateTablesResponse, DatasetConfig, Handler, QueryMethodResponse, Server,
+//!     SetupResponse, TeardownResponse,
 //! };
 //! use async_trait::async_trait;
 //! use std::collections::HashMap;
@@ -88,6 +89,10 @@ limitations under the License.
 //!         })
 //!     }
 //!
+//!     async fn create_tables(&mut self, run_id: Uuid) -> Result<CreateTablesResponse, String> {
+//!         Ok(CreateTablesResponse { ok: true })
+//!     }
+//!
 //!     async fn teardown(&mut self, run_id: Uuid) -> Result<TeardownResponse, String> {
 //!         Ok(TeardownResponse { ok: true })
 //!     }
@@ -164,6 +169,22 @@ pub struct SetupResponse {
     pub ok: bool,
 }
 
+/// Request to create benchmark tables in the system under test.
+///
+/// JSON-RPC method: `create_tables`
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CreateTablesRequest {
+    /// Unique identifier for this benchmark run
+    pub run_id: Uuid,
+}
+
+/// Response from create_tables request
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct CreateTablesResponse {
+    /// Indicates if table creation was successful
+    pub ok: bool,
+}
+
 /// Request to get query method/driver information
 ///
 /// JSON-RPC method: `query_method`
@@ -355,6 +376,7 @@ pub mod error_codes {
 /// Method names for the system adapter protocol
 pub mod methods {
     pub const SETUP: &str = "setup";
+    pub const CREATE_TABLES: &str = "create_tables";
     pub const QUERY_METHOD: &str = "query_method";
     pub const TEARDOWN: &str = "teardown";
     pub const METRICS: &str = "metrics";