spiceai
diff --git a/‎.claude/skills/system-adapter-builder/SKILL.md‎
Lines changed: 10 additions & 6 deletions b/‎.claude/skills/system-adapter-builder/SKILL.md‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎.github/workflows/validate_system_adapter_templates.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/validate_system_adapter_templates.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Cargo.lock‎
Lines changed: 20 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 39 additions & 13 deletions b/‎README.md‎
Lines changed: 39 additions & 13 deletions
diff --git a/‎crates/checkpointer/Cargo.toml‎
Lines changed: 25 additions & 0 deletions b/‎crates/checkpointer/Cargo.toml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎crates/checkpointer/README.md‎
Lines changed: 3 additions & 0 deletions b/‎crates/checkpointer/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎crates/checkpointer/src/main.rs‎
Lines changed: 171 additions & 0 deletions b/‎crates/checkpointer/src/main.rs‎
Lines changed: 171 additions & 0 deletions
@@ -1,6 +1,6 @@
 ---
 name: system-adapter-builder
-description: Build or update a SpiceBench system adapter with JSON-RPC over stdio and HTTP, including setup/query_method/teardown/metrics support and template validation.
+description: Build or update a SpiceBench system adapter with JSON-RPC over stdio and HTTP, including setup/create_tables/query_method/teardown/metrics support and template validation.
 ---
 
 # SpiceBench System Adapter Builder
@@ -17,6 +17,7 @@ A JSON-RPC 2.0 adapter that supports both transports:
 Required methods:
 
 - `setup(run_id, datasets)`
+- `create_tables(run_id)`
 - `query_method(run_id)`
 - `teardown(run_id)`
 - `metrics(run_id)`
@@ -34,15 +35,17 @@ Required methods:
 
 1. Copy the nearest template from `system-adapters/templates/<language>`.
 2. Keep request/response envelopes JSON-RPC 2.0 compliant (`jsonrpc`, `id`, `method`, `params`).
-3. Implement `setup` and `teardown` with run-scoped resources keyed by `run_id`.
-4. Implement `query_method` to return:
+3. Implement `setup` with run-scoped resources keyed by `run_id`.
+4. Implement `create_tables` so the adapter creates/registers benchmark destination tables.
+5. Implement `query_method` to return:
    - `driver`: typically `flightsql` or `databricks`
    - `db_kwargs`: real endpoint + auth kwargs for the SUT
-5. Implement `metrics` to return both objects:
+6. Implement `teardown` with run-scoped cleanup keyed by `run_id`.
+7. Implement `metrics` to return both objects:
    - `resource`: CPU, memory, disk bytes, disk IOPS
    - `ingestion`: rows, bytes, rows/s, active connections
-6. Keep stdio and HTTP using the same dispatcher so behavior is identical.
-7. Return JSON-RPC errors with standard codes:
+8. Keep stdio and HTTP using the same dispatcher so behavior is identical.
+9. Return JSON-RPC errors with standard codes:
    - `-32700` parse error
    - `-32600` invalid request
    - `-32601` method not found
@@ -70,6 +73,7 @@ If any metric is unavailable, return `0`/`0.0` and document why.
 
 - Adapter responds to all required methods over stdio and HTTP.
 - `rpc.methods` includes every exposed method.
+- `create_tables` creates/registers benchmark tables for each dataset.
 - `query_method` returns a valid `driver` and complete `db_kwargs`.
 - `metrics` returns both `resource` and `ingestion` objects.
 - Language build/syntax checks pass:
 
@@ -152,9 +152,10 @@ jobs:
         working-directory: system-adapters/templates/go
         run: go build ./...
       - name: Smoke test Go template (HTTP setup)
+        working-directory: system-adapters/templates/go
         run: |
           set -euo pipefail
-          go run ./system-adapters/templates/go --transport http --host 127.0.0.1 --port 18083 --path /jsonrpc > /tmp/go-template.log 2>&1 &
+          go run . --transport http --host 127.0.0.1 --port 18083 --path /jsonrpc > /tmp/go-template.log 2>&1 &
           pid=$!
           trap 'kill "$pid" 2>/dev/null || true' EXIT
 
 
@@ -3,7 +3,7 @@ exclude = ["system-adapters/databricks"]
 members = [
     ".",
     "crates/adbc_client",
-    "crates/app",
+    "crates/app", "crates/checkpointer",
     "crates/data-generation",
     "crates/duration-parse", "crates/etl",
     "crates/flight_client",
 
@@ -17,7 +17,7 @@ flowchart TB
         direction TB
 
         subgraph setup_phase["1 · Setup (JSON-RPC)"]
-            adapter_iface["System Adapter Protocol\n(setup / teardown / metrics)"]
+            adapter_iface["System Adapter Protocol\n(setup / create_tables /\nquery_method / teardown / metrics)"]
             spice["Spice Cloud Adapter"]
             databricks["Databricks Adapter"]
             other["... Other Adapters"]
@@ -104,8 +104,9 @@ flowchart TB
 
     orchestrator -->|"start run"| run
 
-    adapter_iface -->|"setup(run_id)"| sut
-    adapter_iface -->|"setup(run_id, datasets)\n→ ADBC driver + kwargs"| executors
+    adapter_iface -->|"setup(run_id, datasets)"| sut
+    adapter_iface -->|"create_tables(run_id)"| sut
+    adapter_iface -->|"query_method(run_id)\n→ ADBC driver + kwargs"| executors
     setup_phase -->|"system ready"| bench_phase
     bench_phase -->|"benchmark complete"| teardown_phase
 
@@ -120,11 +121,11 @@ flowchart TB
 
 A **Run** is a single end-to-end execution of the benchmark for one system. Each Run proceeds through three phases:
 
-| Phase                    | What happens                                                                                                                                                       | Timed? |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ |
-| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT and return ADBC driver config. | No     |
-| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients).                     | Yes    |
-| **3. Teardown**          | Call `teardown(run_id)` via the adapter to deprovision resources and clean up.                                                                                     | No     |
+| Phase                    | What happens                                                                                                                                                                                      | Timed? |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ |
+| **1. Setup**             | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT, then `create_tables(run_id)`, then `query_method(run_id)` to get ADBC driver config. | No     |
+| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients).                                                    | Yes    |
+| **3. Teardown**          | Call `teardown(run_id)` via the adapter to deprovision resources and clean up.                                                                                                                    | No     |
 
 The **E2E benchmark duration** (phase 2, load test stage) is the primary ranking metric. After the load test, each query's p99 latency is compared against the baseline: >20% increase = FAIL, 10–20% = WARN, ≥3 WARNs = FAIL.
 
@@ -147,7 +148,7 @@ Common CLI/workflow usage:
 | Component                   | Responsibility                                                                                                                                                |
 | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **GitHub Actions**          | Orchestrates Runs on schedule, PR, or manual dispatch. Manages the full Run lifecycle across phases.                                                          |
-| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `teardown`, `metrics`.                                                            |
+| **System Adapter Protocol** | JSON-RPC 2.0 interface (stdio or HTTP) for each platform. Methods: `setup`, `create_tables`, `query_method`, `teardown`, `metrics`.                           |
 | **Query Executors**         | Pluggable query execution: ADBC direct (FlightSQL/Databricks drivers), HTTP (`/v1/sql`), or distributed (`/v1/queries` with polling).                         |
 | **Data Generator**          | Standalone binary (`data-generation`) that produces TPC-H partitioned Parquet batches and writes them to S3.                                                  |
 | **Test Framework**          | Core engine managing the warm-up → baseline → load test pipeline, query sets (TPC-H, TPC-DS, ClickBench, parameterized, scenario), and statistics collection. |
@@ -216,8 +217,10 @@ Results from every Run are published to [SpiceBench.com](https://spicebench.com)
 To benchmark a new platform, implement the JSON-RPC 2.0 adapter with these methods:
 
 1. **`setup(run_id, datasets)`** — Provision infrastructure and configure the target system.
-2. **`teardown(run_id)`** — Clean up provisioned resources.
-3. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
+2. **`create_tables(run_id)`** — Create/register destination tables for the benchmark datasets.
+3. **`query_method(run_id)`** — Return the ADBC driver type (`flightsql` or `databricks`) and connection kwargs so SpiceBench can establish a direct query connection.
+4. **`teardown(run_id)`** — Clean up provisioned resources.
+5. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
 
 The adapter can run as a **stdio** child process or as an **HTTP** server.
 
@@ -236,7 +239,30 @@ The `spicebench` CLI connects to a system adapter using JSON-RPC 2.0 over either
 - **stdio transport**: use `--system-adapter-stdio-cmd` (SpiceBench starts the child process).
 - **HTTP transport**: use `--system-adapter-http-url` (SpiceBench connects to a remote adapter endpoint).
 - **execution mode**: `adapter-command` (default) dispatches `spicebench run ...` to adapter JSON-RPC `run.load`.
-- **execution mode**: `direct-query` runs the load/query path directly via ADBC, using the adapter only for setup/teardown/metrics.
+- **execution mode**: `direct-query` runs the load/query path directly via ADBC, using the adapter for setup/table creation/teardown/metrics.
+
+#### Adapter lifecycle (direct-query mode)
+
+For each run, SpiceBench calls adapter JSON-RPC methods in this order:
+
+1. `setup(run_id, datasets, metadata)`
+2. `create_tables(run_id)`
+3. `query_method(run_id)`
+4. benchmark execution and optional periodic `metrics(run_id)` scraping
+5. `teardown(run_id)`
+
+Tiny `create_tables` request example:
+
+```json
+{
+    "jsonrpc": "2.0",
+    "id": 2,
+    "method": "create_tables",
+    "params": {
+        "run_id": "00000000-0000-0000-0000-000000000000"
+    }
+}
+```
 
 #### Stdio example (child process started by SpiceBench)
 
@@ -268,7 +294,7 @@ Notes:
 - `--system-adapter-stdio-args` passes CLI args to the stdio adapter command.
 - `--system-adapter-env` is only valid for stdio transport.
 
-#### Direct-query example (ADBC query path, adapter for setup/teardown only)
+#### Direct-query example (ADBC query path, adapter for setup/table creation/teardown)
 
 ```bash
 spicebench \
 
@@ -0,0 +1,25 @@
+[package]
+name = "checkpointer"
+edition.workspace = true
+exclude.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+version.workspace = true
+
+[dependencies]
+adbc_client = { path = "../adbc_client" }
+anyhow.workspace = true
+arrow.workspace = true
+async-trait.workspace = true
+chrono.workspace = true
+clap = { workspace = true, features = ["derive"] }
+data-generation = { path = "../data-generation" }
+etl = { path = "../etl" }
+serde_json.workspace = true
+system-adapter-protocol = { path = "../system-adapter-protocol" }
+tokio.workspace = true
+tokio-util.workspace = true
+tracing.workspace = true
+tracing-subscriber = { workspace = true, features = ["env-filter"] }
@@ -0,0 +1,3 @@
+# Checkpointer
+
+Runs ETL to a point-in-time, then captures the expected results from queries.
@@ -0,0 +1,171 @@
+/*
+Copyright 2024-2025 The Spice.ai OSS Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+use std::sync::Arc;
+
+use adbc_client::AdbcConnection;
+use clap::Parser;
+use data_generation::config::{DatasetConfig, TargetConfig};
+use data_generation::dataset::MutationConfig;
+use data_generation::storage::s3::S3Storage;
+use etl::sink::adbc::AdbcSink;
+use etl::{DatasetSource, ETLPipeline, PipelineState, StopReason};
+use serde_json::Value;
+use tracing_subscriber::EnvFilter;
+
+#[derive(Parser)]
+#[command(
+    about = "Run an ETL pipeline that reads from S3, rehydrates data, and writes directly to a SUT via ADBC"
+)]
+struct Cli {
+    /// Dataset type: "tpch" or "simple_sequence"
+    #[arg(long, default_value = "tpch")]
+    dataset: String,
+
+    /// Scale factor for data generation
+    #[arg(long, default_value_t = 1.0)]
+    scale_factor: f64,
+
+    /// Number of data generation steps (partitions)
+    #[arg(long, default_value_t = 25)]
+    num_steps: u16,
+
+    /// S3 bucket name (used for both source and target)
+    #[arg(long)]
+    bucket: String,
+
+    /// S3 key prefix for source data
+    #[arg(long, default_value = "")]
+    source_prefix: String,
+    /// AWS region
+    #[arg(long)]
+    region: Option<String>,
+
+    /// S3 endpoint URL (for MinIO/LocalStack)
+    #[arg(long)]
+    endpoint: Option<String>,
+
+    /// ADBC driver name (for example: databricks, flightsql)
+    #[arg(long)]
+    adbc_driver: String,
+
+    /// ADBC connection URI passed as db option `uri`
+    #[arg(long)]
+    adbc_uri: String,
+
+    /// Optional schema name to prefix destination table names
+    #[arg(long)]
+    adbc_schema: Option<String>,
+
+    /// Every N steps to take a checkpoint
+    #[arg(long, default_value_t = 100)]
+    checkpoint_interval_steps: u64,
+}
+
+impl Cli {
+    fn dataset_source(&self) -> anyhow::Result<DatasetSource> {
+        match self.dataset.as_str() {
+            "tpch" => Ok(DatasetSource::Tpch),
+            "simple_sequence" => Ok(DatasetSource::SimpleSequence),
+            other => {
+                anyhow::bail!("Unknown dataset type: {other}. Use 'tpch' or 'simple_sequence'.")
+            }
+        }
+    }
+
+    fn dataset_config(&self) -> DatasetConfig {
+        DatasetConfig {
+            dataset_type: self.dataset.clone(),
+            scale_factor: self.scale_factor,
+            num_steps: self.num_steps,
+        }
+    }
+
+    fn source_config(&self) -> TargetConfig {
+        TargetConfig {
+            bucket: self.bucket.clone(),
+            prefix: self.source_prefix.clone(),
+            region: self.region.clone(),
+            endpoint: self.endpoint.clone(),
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(EnvFilter::from_default_env())
+        .init();
+
+    let cli = Cli::parse();
+
+    let dataset_source = cli.dataset_source()?;
+    let dataset_config = cli.dataset_config();
+
+    let source = Arc::new(S3Storage::new(&cli.source_config())?);
+
+    let adbc_conn = AdbcConnection::create(
+        &cli.adbc_driver,
+        std::collections::HashMap::from([("uri".to_string(), Value::String(cli.adbc_uri.clone()))]),
+    )?;
+    let target = Arc::new(AdbcSink::new(adbc_conn, cli.adbc_schema.clone()));
+
+    let mutations = MutationConfig::new(0.1, 0.1);
+
+    let mut pipeline =
+        ETLPipeline::new(dataset_source, &dataset_config, source, target, &mutations)?;
+
+    tracing::info!(
+        dataset = %cli.dataset,
+        bucket = %cli.bucket,
+        source_prefix = %cli.source_prefix,
+        adbc_driver = %cli.adbc_driver,
+        adbc_schema = ?cli.adbc_schema,
+        scale_factor = cli.scale_factor,
+        num_steps = cli.num_steps,
+        "Starting ETL pipeline"
+    );
+
+    // Log the tables and schemas that will be processed.
+    let datasets = pipeline.setup_request_datasets();
+    for (name, config) in &datasets {
+        tracing::info!(table = %name, schema = ?config.schema, "Dataset table registered");
+    }
+
+    pipeline.initialize().await?;
+    pipeline.run(cli.checkpoint_interval_steps as usize)?;
+
+    match pipeline.wait().await {
+        PipelineState::Paused => {}
+        PipelineState::Stopped(StopReason::Completed) => {
+            tracing::info!("ETL pipeline completed successfully");
+        }
+        PipelineState::Stopped(StopReason::Cancelled) => {
+            tracing::warn!("ETL pipeline was cancelled");
+        }
+        PipelineState::Stopped(StopReason::Error(e)) => {
+            tracing::error!(error = %e, "ETL pipeline stopped with error");
+            anyhow::bail!("ETL pipeline failed: {e}");
+        }
+        other => {
+            anyhow::bail!("Unexpected final pipeline state: {other:?}");
+        }
+    }
+
+    // TODO: checkpoint the current state, continue the pipeline, and continue checkpointing until completion.
+
+    Ok(())
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Checkpointer`
	`2`	`+`
	`3`	`+Runs ETL to a point-in-time, then captures the expected results from queries.`