Skip to content

Commit 5cbdb2a

Browse files
authored
Update setup and create_tables methods (#77)
* refactor: Update setup and create_tables methods to use metadata and datasets parameters * fix: Correct formatting in README for Run phases table
1 parent f0bfd69 commit 5cbdb2a

16 files changed

Lines changed: 113 additions & 85 deletions

File tree

.claude/skills/system-adapter-builder/SKILL.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ A JSON-RPC 2.0 adapter that supports both transports:
1616

1717
Required methods:
1818

19-
- `setup(run_id, datasets)`
20-
- `create_tables(run_id)`
19+
- `setup(run_id, metadata)`
20+
- `create_tables(run_id, datasets)`
2121
- `teardown(run_id)`
2222
- `metrics(run_id)`
2323
- `rpc.methods`

.github/workflows/validate_system_adapter_templates.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
4747
for _ in {1..30}; do
4848
if curl -sS -o /tmp/python-resp.json -H 'Content-Type: application/json' \
49-
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
49+
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
5050
http://127.0.0.1:18080/jsonrpc; then
5151
break
5252
fi
@@ -57,7 +57,7 @@ jobs:
5757
grep -q '"driver":"flightsql"' /tmp/python-resp.json
5858
grep -q '"db_kwargs"' /tmp/python-resp.json
5959
curl -sS -o /tmp/python-create.json -H 'Content-Type: application/json' \
60-
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
60+
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
6161
http://127.0.0.1:18080/jsonrpc
6262
grep -q '"ok":true' /tmp/python-create.json
6363
curl -sS -o /tmp/python-methods.json -H 'Content-Type: application/json' \
@@ -89,7 +89,7 @@ jobs:
8989
9090
for _ in {1..30}; do
9191
if curl -sS -o /tmp/node-resp.json -H 'Content-Type: application/json' \
92-
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
92+
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
9393
http://127.0.0.1:18081/jsonrpc; then
9494
break
9595
fi
@@ -100,7 +100,7 @@ jobs:
100100
grep -q '"driver":"flightsql"' /tmp/node-resp.json
101101
grep -q '"db_kwargs"' /tmp/node-resp.json
102102
curl -sS -o /tmp/node-create.json -H 'Content-Type: application/json' \
103-
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
103+
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
104104
http://127.0.0.1:18081/jsonrpc
105105
grep -q '"ok":true' /tmp/node-create.json
106106
curl -sS -o /tmp/node-methods.json -H 'Content-Type: application/json' \
@@ -134,7 +134,7 @@ jobs:
134134
135135
for _ in {1..30}; do
136136
if curl -sS -o /tmp/rust-resp.json -H 'Content-Type: application/json' \
137-
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
137+
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
138138
http://127.0.0.1:18082/jsonrpc; then
139139
break
140140
fi
@@ -145,7 +145,7 @@ jobs:
145145
grep -q '"driver":"flightsql"' /tmp/rust-resp.json
146146
grep -q '"db_kwargs"' /tmp/rust-resp.json
147147
curl -sS -o /tmp/rust-create.json -H 'Content-Type: application/json' \
148-
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
148+
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
149149
http://127.0.0.1:18082/jsonrpc
150150
grep -q '"ok":true' /tmp/rust-create.json
151151
curl -sS -o /tmp/rust-methods.json -H 'Content-Type: application/json' \
@@ -179,7 +179,7 @@ jobs:
179179
180180
for _ in {1..30}; do
181181
if curl -sS -o /tmp/go-resp.json -H 'Content-Type: application/json' \
182-
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
182+
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
183183
http://127.0.0.1:18083/jsonrpc; then
184184
break
185185
fi
@@ -190,7 +190,7 @@ jobs:
190190
grep -q '"driver":"flightsql"' /tmp/go-resp.json
191191
grep -q '"db_kwargs"' /tmp/go-resp.json
192192
curl -sS -o /tmp/go-create.json -H 'Content-Type: application/json' \
193-
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
193+
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
194194
http://127.0.0.1:18083/jsonrpc
195195
grep -q '"ok":true' /tmp/go-create.json
196196
curl -sS -o /tmp/go-methods.json -H 'Content-Type: application/json' \
@@ -223,7 +223,7 @@ jobs:
223223
224224
for _ in {1..30}; do
225225
if curl -sS -o /tmp/java-resp.json -H 'Content-Type: application/json' \
226-
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
226+
-d '{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
227227
http://127.0.0.1:18084/jsonrpc; then
228228
break
229229
fi
@@ -234,7 +234,7 @@ jobs:
234234
grep -q '"driver":"flightsql"' /tmp/java-resp.json
235235
grep -q '"db_kwargs"' /tmp/java-resp.json
236236
curl -sS -o /tmp/java-create.json -H 'Content-Type: application/json' \
237-
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000"}}' \
237+
-d '{"jsonrpc":"2.0","id":3,"method":"create_tables","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{}}}' \
238238
http://127.0.0.1:18084/jsonrpc
239239
grep -q '"ok":true' /tmp/java-create.json
240240
curl -sS -o /tmp/java-methods.json -H 'Content-Type: application/json' \

README.md

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ flowchart TB
104104
105105
orchestrator -->|"start run"| run
106106
107-
adapter_iface -->|"setup(run_id, datasets)\n→ ADBC driver + kwargs"| executors
108-
adapter_iface -->|"create_tables(run_id)"| sut
107+
adapter_iface -->|"setup(run_id, metadata)\n→ ADBC driver + kwargs"| executors
108+
adapter_iface -->|"create_tables(run_id, datasets)"| sut
109109
setup_phase -->|"system ready"| bench_phase
110110
bench_phase -->|"benchmark complete"| teardown_phase
111111
@@ -120,11 +120,11 @@ flowchart TB
120120

121121
A **Run** is a single end-to-end execution of the benchmark for one system. Each Run proceeds through three phases:
122122

123-
| Phase | What happens | Timed? |
124-
| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ |
125-
| **1. Setup** | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, datasets)` to provision the SUT and return ADBC driver config, then `create_tables(run_id)`. | No |
126-
| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients). | Yes |
127-
| **3. Teardown** | Call `teardown(run_id)` via the adapter to deprovision resources and clean up. | No |
123+
| Phase | What happens | Timed? |
124+
| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ |
125+
| **1. Setup** | Connect to system adapter via JSON-RPC (stdio or HTTP). Call `setup(run_id, metadata)` to provision the SUT and return ADBC driver config, then `create_tables(run_id, datasets)`. | No |
126+
| **2. Benchmark (timed)** | Three sequential stages — warm-up (1× query set), baseline (10% of duration, 60s–600s), and load test (full duration with concurrent clients). | Yes |
127+
| **3. Teardown** | Call `teardown(run_id)` via the adapter to deprovision resources and clean up. | No |
128128

129129
The **E2E benchmark duration** (phase 2, load test stage) is the primary ranking metric. After the load test, each query's p99 latency is compared against the baseline: >20% increase = FAIL, 10–20% = WARN, ≥3 WARNs = FAIL.
130130

@@ -215,8 +215,8 @@ Results from every Run are published to [SpiceBench.com](https://spicebench.com)
215215

216216
To benchmark a new platform, implement the JSON-RPC 2.0 adapter with these methods:
217217

218-
1. **`setup(run_id, datasets)`** — Provision infrastructure and configure the target system.
219-
2. **`create_tables(run_id)`** — Create/register destination tables for the benchmark datasets.
218+
1. **`setup(run_id, metadata)`** — Provision infrastructure and configure the target system.
219+
2. **`create_tables(run_id, datasets)`** — Create/register destination tables for the benchmark datasets.
220220
3. **`teardown(run_id)`** — Clean up provisioned resources.
221221
4. **`metrics(run_id)`** *(optional)* — Return current resource usage (CPU, memory, disk, IOPS) and ingestion progress (rows, bytes, rows/s, active connections).
222222

@@ -243,8 +243,8 @@ The `spicebench` CLI connects to a system adapter using JSON-RPC 2.0 over either
243243

244244
For each run, SpiceBench calls adapter JSON-RPC methods in this order:
245245

246-
1. `setup(run_id, datasets, metadata)`
247-
2. `create_tables(run_id)`
246+
1. `setup(run_id, metadata)`
247+
2. `create_tables(run_id, datasets)`
248248
3. benchmark execution and optional periodic `metrics(run_id)` scraping
249249
4. `teardown(run_id)`
250250

@@ -256,7 +256,8 @@ Tiny `create_tables` request example:
256256
"id": 2,
257257
"method": "create_tables",
258258
"params": {
259-
"run_id": "00000000-0000-0000-0000-000000000000"
259+
"run_id": "00000000-0000-0000-0000-000000000000",
260+
"datasets": {}
260261
}
261262
}
262263
```

crates/checkpointer/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ async fn main() -> anyhow::Result<()> {
198198
);
199199

200200
// Log the tables and schemas that will be processed.
201-
let datasets = pipeline.setup_request_datasets();
201+
let datasets = pipeline.create_tables_request_datasets();
202202
for (name, config) in &datasets {
203203
tracing::info!(table = %name, schema = ?config.schema, "Dataset table registered");
204204
}

crates/etl/src/lib.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ struct PipelineWorkState {
183183
/// # Lifecycle
184184
///
185185
/// 1. **[`NotStarted`](PipelineState::NotStarted)** — created via [`ETLPipeline::new`]
186-
/// with a dataset, source, and target. Call [`setup_request_datasets`](ETLPipeline::setup_request_datasets)
186+
/// with a dataset, source, and target. Call [`create_tables_request_datasets`](ETLPipeline::create_tables_request_datasets)
187187
/// to obtain the dataset configurations that a system adapter needs.
188188
/// 2. **[`Initialized`](PipelineState::Initialized)** — the first batch (batch 0)
189189
/// has been ETL'd into the target via [`initialize`](ETLPipeline::initialize).
@@ -277,14 +277,14 @@ impl ETLPipeline {
277277
self.cancel_token.cancel();
278278
}
279279

280-
/// Returns the dataset configurations required to set up the system adapter.
280+
/// Returns the dataset configurations required for `create_tables`.
281281
///
282282
/// Each entry maps a table name to its
283283
/// [`DatasetConfig`](system_adapter_protocol::DatasetConfig), which includes
284284
/// the rehydrated Arrow schema. This can be used to build a
285-
/// [`SetupRequest`](system_adapter_protocol::SetupRequest) for the system
286-
/// adapter.
287-
pub fn setup_request_datasets(&self) -> HashMap<String, ProtocolDatasetConfig> {
285+
/// [`CreateTablesRequest`](system_adapter_protocol::CreateTablesRequest) for
286+
/// the system adapter.
287+
pub fn create_tables_request_datasets(&self) -> HashMap<String, ProtocolDatasetConfig> {
288288
self.dataset
289289
.tables()
290290
.into_iter()

crates/etl/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ async fn main() -> anyhow::Result<()> {
136136
);
137137

138138
// Log the tables and schemas that will be processed.
139-
let datasets = pipeline.setup_request_datasets();
139+
let datasets = pipeline.create_tables_request_datasets();
140140
for (name, config) in &datasets {
141141
tracing::info!(table = %name, schema = ?config.schema, "Dataset table registered");
142142
}

crates/system-adapter-protocol/src/client.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,9 @@ impl Client {
179179
pub async fn setup(
180180
&mut self,
181181
run_id: uuid::Uuid,
182-
datasets: std::collections::HashMap<String, crate::DatasetConfig>,
183182
metadata: std::collections::HashMap<String, serde_json::Value>,
184183
) -> Result<crate::SetupResponse> {
185-
let request = crate::SetupRequest {
186-
run_id,
187-
datasets,
188-
metadata,
189-
};
184+
let request = crate::SetupRequest { run_id, metadata };
190185
let rpc_request = JsonRpcRequest::new(1, crate::methods::SETUP, request);
191186
let response = self.call_typed(rpc_request).await?;
192187
response
@@ -198,8 +193,9 @@ impl Client {
198193
pub async fn create_tables(
199194
&mut self,
200195
run_id: uuid::Uuid,
196+
datasets: std::collections::HashMap<String, crate::DatasetConfig>,
201197
) -> Result<crate::CreateTablesResponse> {
202-
let request = crate::CreateTablesRequest { run_id };
198+
let request = crate::CreateTablesRequest { run_id, datasets };
203199
let rpc_request = JsonRpcRequest::new(1, crate::methods::CREATE_TABLES, request);
204200
let response = self.call_typed(rpc_request).await?;
205201
response

crates/system-adapter-protocol/src/lib.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ limitations under the License.
4141
//!
4242
//! // Setup a benchmark run
4343
//! let run_id = Uuid::new_v4();
44-
//! let setup_response = client.setup(run_id, HashMap::new(), HashMap::new()).await?;
45-
//! let create_tables_response = client.create_tables(run_id).await?;
44+
//! let setup_response = client.setup(run_id, HashMap::new()).await?;
45+
//! let create_tables_response = client.create_tables(run_id, HashMap::new()).await?;
4646
//!
4747
//! println!("Driver: {:?}", setup_response.driver);
4848
//!
@@ -72,7 +72,6 @@ limitations under the License.
7272
//! async fn setup(
7373
//! &mut self,
7474
//! run_id: Uuid,
75-
//! datasets: HashMap<String, DatasetConfig>,
7675
//! metadata: HashMap<String, serde_json::Value>,
7776
//! ) -> Result<SetupResponse, String> {
7877
//! // Your setup logic here
@@ -83,7 +82,12 @@ limitations under the License.
8382
//! })
8483
//! }
8584
//!
86-
//! async fn create_tables(&mut self, run_id: Uuid) -> Result<CreateTablesResponse, String> {
85+
//! async fn create_tables(
86+
//! &mut self,
87+
//! run_id: Uuid,
88+
//! datasets: HashMap<String, DatasetConfig>,
89+
//! ) -> Result<CreateTablesResponse, String> {
90+
//! let _ = datasets;
8791
//! Ok(CreateTablesResponse { ok: true })
8892
//! }
8993
//!
@@ -146,11 +150,10 @@ pub struct DatasetConfig {
146150
///
147151
/// JSON-RPC method: `setup`
148152
#[derive(Debug, Clone, Serialize, Deserialize)]
153+
#[serde(deny_unknown_fields)]
149154
pub struct SetupRequest {
150155
/// Unique identifier for this benchmark run
151156
pub run_id: Uuid,
152-
/// Map of dataset name to dataset definition
153-
pub datasets: HashMap<String, DatasetConfig>,
154157
/// Arbitrary run metadata propagated from spicebench to adapters
155158
#[serde(default)]
156159
pub metadata: HashMap<String, serde_json::Value>,
@@ -169,9 +172,12 @@ pub struct SetupResponse {
169172
///
170173
/// JSON-RPC method: `create_tables`
171174
#[derive(Debug, Clone, Serialize, Deserialize)]
175+
#[serde(deny_unknown_fields)]
172176
pub struct CreateTablesRequest {
173177
/// Unique identifier for this benchmark run
174178
pub run_id: Uuid,
179+
/// Map of dataset name to dataset definition
180+
pub datasets: HashMap<String, DatasetConfig>,
175181
}
176182

177183
/// Response from create_tables request

crates/system-adapter-protocol/src/server.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ pub trait Handler: Send + Sync {
7575
async fn setup(
7676
&mut self,
7777
run_id: Uuid,
78-
datasets: HashMap<String, DatasetConfig>,
7978
metadata: HashMap<String, serde_json::Value>,
8079
) -> std::result::Result<SetupResponse, String>;
8180

8281
/// Create benchmark tables for a run
8382
async fn create_tables(
8483
&mut self,
8584
run_id: Uuid,
85+
datasets: HashMap<String, DatasetConfig>,
8686
) -> std::result::Result<CreateTablesResponse, String>;
8787

8888
/// Teardown a benchmark run
@@ -242,12 +242,7 @@ impl<H: Handler> Server<H> {
242242
Ok(r) => r,
243243
Err(e) => return e,
244244
};
245-
Self::handler_response(
246-
self.handler
247-
.setup(req.run_id, req.datasets, req.metadata)
248-
.await,
249-
id,
250-
)
245+
Self::handler_response(self.handler.setup(req.run_id, req.metadata).await, id)
251246
}
252247

253248
async fn handle_create_tables(
@@ -259,7 +254,10 @@ impl<H: Handler> Server<H> {
259254
Ok(r) => r,
260255
Err(e) => return e,
261256
};
262-
Self::handler_response(self.handler.create_tables(req.run_id).await, id)
257+
Self::handler_response(
258+
self.handler.create_tables(req.run_id, req.datasets).await,
259+
id,
260+
)
263261
}
264262

265263
async fn handle_teardown(
@@ -304,7 +302,6 @@ mod tests {
304302
async fn setup(
305303
&mut self,
306304
_run_id: Uuid,
307-
_datasets: HashMap<String, DatasetConfig>,
308305
_metadata: HashMap<String, serde_json::Value>,
309306
) -> std::result::Result<SetupResponse, String> {
310307
Ok(SetupResponse {
@@ -316,6 +313,7 @@ mod tests {
316313
async fn create_tables(
317314
&mut self,
318315
_run_id: Uuid,
316+
_datasets: HashMap<String, DatasetConfig>,
319317
) -> std::result::Result<CreateTablesResponse, String> {
320318
Ok(CreateTablesResponse { ok: true })
321319
}
@@ -335,7 +333,7 @@ mod tests {
335333
#[tokio::test]
336334
async fn test_server_setup() {
337335
let mut server = Server::new(TestHandler);
338-
let request = r#"{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","datasets":{},"metadata":{}}}"#;
336+
let request = r#"{"jsonrpc":"2.0","id":1,"method":"setup","params":{"run_id":"00000000-0000-0000-0000-000000000000","metadata":{}}}"#;
339337
let response = server.handle_request(request).await;
340338

341339
assert!(response.get("result").is_some());

0 commit comments

Comments
 (0)