Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/actions/build-spicebench/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ runs:
shell: bash
run: |
mkdir -p ~/.spice/bin
cargo build -p spicebench
install -m 755 target/debug/spicebench ~/.spice/bin/spicebench
cargo build -p spicebench --release
install -m 755 target/release/spicebench ~/.spice/bin/spicebench

- name: Save spicebench cache
if: steps.build-spicebench.outcome == 'success'
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/run_spicebench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,12 @@ jobs:
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_TARGET_BATCH_ROWS=500000
# ADBC writes are heavyweight (staging-parquet upload + MERGE), so
# splitting a segment into many small writes hurts: disable chunking
# (one write per segment) and let each table do 1 in-flight write with
# no global ceiling (tables ingest concurrently). Mirrors trunk.
export SPICEBENCH_SINK_CHUNK_ROWS=0
export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=1
export SPICEBENCH_ADBC_MAX_INGEST_BATCH_BYTES=1268435456
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/run_spicebench_debug_spice_cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,12 @@ jobs:

export SPICEBENCH_ADBC_UPDATE_STRATEGY=${{ inputs.adbc_update_strategy || 'bulk_ingest_upsert' }}
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
# ADBC sink (spidapter -> Cayenne): writes are heavyweight, so don't
# split a segment into many small writes — that floods the executor
# with tiny ingests (the prior crash). One write per segment, 1 in-flight
# write per table, no global ceiling (tables ingest concurrently).
export SPICEBENCH_SINK_CHUNK_ROWS=0
export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=1
if [ "${SPICEBENCH_ADBC_UPDATE_STRATEGY}" = "bulk_ingest_upsert" ]; then
export SPICEBENCH_ADBC_FLUSH_STREAM_BEFORE_UPSERT=true
else
Expand Down
56 changes: 42 additions & 14 deletions .github/workflows/run_spicebench_debug_streaming.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ on:
disable_teardown:
type: boolean
default: false
run_tag:
type: string
default: ''
validate_results:
type: boolean
default: true
workflow_dispatch:
inputs:
environment:
Expand All @@ -69,8 +75,8 @@ on:
type: choice
options:
- postgres-wal
- dynamodb-streams
- mongodb-streams
- dynamodb-streams
acceleration:
description: 'Acceleration engine'
required: true
Expand Down Expand Up @@ -151,6 +157,16 @@ on:
required: false
default: false
type: boolean
run_tag:
description: 'Optional suffix appended to the adapter_name metric label (e.g. "analyze-fix"). Used to differentiate runs in Grafana.'
required: false
default: ''
type: string
validate_results:
description: 'Validate checkpoint results (disable to measure raw ETL throughput without waiting for spiced convergence)'
required: false
default: true
type: boolean

jobs:
run-spicebench:
Expand Down Expand Up @@ -197,6 +213,7 @@ jobs:
SPICE_CLOUD_API_URL: ${{ inputs.environment == 'production' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }}
SYSTEM_UNDER_TEST: ${{ inputs.system_under_test }}
SYSTEM_ADAPTER: ${{ inputs.system_under_test }}-${{ inputs.acceleration }}
RUN_TAG: ${{ inputs.run_tag || '' }}
ACCELERATION: ${{ inputs.acceleration }}
SCENARIO: ${{ inputs.scenario || 'tpch' }}
SCALE_FACTOR: ${{ inputs.scale_factor || '1' }}
Expand All @@ -206,7 +223,7 @@ jobs:
ETL_REGION: us-east-1
ETL_SINK: adbc
SCHEDULER_STATE_LOCATION: s3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/
VALIDATE_CHECKPOINT_RESULTS: 'true'
VALIDATE_CHECKPOINT_RESULTS: ${{ inputs.validate_results != false && 'true' || 'false' }}
CHECKPOINT_VALIDATION_TIMEOUT: ${{ inputs.checkpoint_validation_timeout || '3600' }}
SCRAPE_SUT_METRICS: 'true'
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
Expand All @@ -220,6 +237,7 @@ jobs:
EC2_AMI_ID: ${{ vars.EC2_AMI_ID }}
EC2_INSTANCE_TYPE: ${{ inputs.ec2_instance_type || vars.EC2_INSTANCE_TYPE }}
EC2_IAM_INSTANCE_PROFILE: ${{ vars.EC2_IAM_INSTANCE_PROFILE }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
SPIDAPTER_APP_MEMORY_LIMIT: ${{ inputs.app_memory_limit || '64Gi' }}
ENABLE_MODULE_DEBUG_LOGGING: ${{ inputs.enable_module_debug_logging || 'false' }}
DISABLE_TEARDOWN: ${{ inputs.disable_teardown || 'false' }}
Expand All @@ -228,7 +246,7 @@ jobs:
run: |
set -euo pipefail
if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then
export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug'
export RUST_LOG='info,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug'
else
export RUST_LOG='info'
fi
Expand Down Expand Up @@ -260,27 +278,31 @@ jobs:
case "${SYSTEM_UNDER_TEST}" in
postgres-wal)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
export SPICEBENCH_TARGET_BATCH_ROWS=100000
export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=8
export SPICEBENCH_SINK_CHUNK_ROWS=20000
export SPICEBENCH_SINK_PARALLELISM=8
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=5000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
;;
postgres-debezium)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=10000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
;;
dynamodb-streams)
USE_EC2=false
export SPICEBENCH_TARGET_BATCH_ROWS=50000
;;
mongodb-streams)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
# Connect to the existing MongoDB Atlas cluster (MONGODB_ATLAS_URI)
# instead of provisioning EC2 — no EC2 env vars needed.
USE_EC2=false
export SPICEBENCH_TARGET_BATCH_ROWS=320000
# Mongo writes are light and benefit from many concurrent small
# writes. Keep chunking; allow up to 32 in-flight writes per table
# (the new per-table default is 1, which would otherwise serialize
# a single hot table) and cap the aggregate at 32 globally.
export SPICEBENCH_SINK_CHUNK_ROWS=5000
export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=64
export SPICEBENCH_SINK_PARALLELISM=64
;;
esac

Expand All @@ -302,6 +324,12 @@ jobs:
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e EC2_SUBNET_ID -e EC2_SECURITY_GROUP_ID -e EC2_AMI_ID -e EC2_INSTANCE_TYPE -e EC2_DISK_SIZE_GB -e EC2_IAM_INSTANCE_PROFILE -e AWS_REGION=us-west-2"
fi

# MongoDB connects to the existing Atlas cluster; pass the URI secret
# into the adapter container so the scenario can interpolate it.
if [ "${SYSTEM_UNDER_TEST}" = "mongodb-streams" ]; then
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e MONGODB_ATLAS_URI"
fi

[ "${USE_PRIVATE_CLUSTER}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_ORGANIZATION_TAG=spicehq"
[ -n "${CUSTOM_IMAGE_TAG}" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_TAG=${CUSTOM_IMAGE_TAG}"
[ "${{ inputs.enable_spice_debug }}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SPICE_DEBUG=true"
Expand Down
Loading
Loading