Skip to content

Run Streaming - postgres-wal - cayenne - events - SF0.1 #82

Run Streaming - postgres-wal - cayenne - events - SF0.1

Run Streaming - postgres-wal - cayenne - events - SF0.1 #82

name: Run - Streaming
run-name: Run Streaming - ${{ inputs.system_under_test }} - ${{ inputs.acceleration }} - ${{ inputs.etl_type }} - SF${{ inputs.scale_factor }}
on:
workflow_dispatch:
inputs:
environment:
description: 'Target Spice Cloud Environment'
required: true
default: 'dev'
type: choice
options:
- dev
- production
system_under_test:
description: 'Streaming source'
required: true
default: postgres-wal
type: choice
options:
- postgres-wal
- dynamodb-streams
- mongodb-streams
acceleration:
description: 'Acceleration engine'
required: true
default: cayenne
type: choice
options:
- cayenne
- duckdb
scenario:
description: 'Scenario/query set to run (e.g. tpch)'
required: true
default: 'tpch'
type: string
etl_type:
description: 'ETL type'
required: true
default: 'changes'
type: choice
options:
- events
- changes
scale_factor:
description: 'Scale Factor'
required: true
default: '1'
type: choice
options:
- '0.1'
- '1'
- '10'
spidapter_image_tag:
description: 'Spidapter Docker image tag (e.g. latest, viktor)'
required: false
default: 'latest'
type: string
custom_image_tag:
description: 'Custom runtime container image tag from ghcr.io/spiceai/spiceai-dev (uses internal channel)'
required: false
default: ''
type: string
num_query_clients:
description: 'Number of concurrent query clients'
required: false
default: '2'
type: string
app_memory_limit:
description: 'Memory limit for the SCP pod (e.g. 64Gi)'
required: false
default: '64Gi'
type: string
ec2_instance_type:
description: 'EC2 instance type override (e.g. m5.large, m5.2xlarge, r5.2xlarge)'
required: false
default: ''
type: string
checkpoint_validation_timeout:
description: 'Max seconds to wait for checkpoint convergence (default: 3600)'
required: false
default: '3600'
type: string
use_private_cluster:
description: 'Pin SCP app to the private cluster (spicehq org tag)'
required: false
default: true
type: boolean
enable_module_debug_logging:
description: 'Enable debug logs'
required: false
default: false
type: boolean
enable_spice_debug:
description: 'Enable debug mode on the Spice Cloud deployment'
required: false
default: false
type: boolean
disable_teardown:
description: 'Skip teardown — keeps EC2 instances and SCP app alive for inspection'
required: false
default: false
type: boolean
jobs:
run-spicebench:
name: Run spicebench (streaming - ${{ inputs.system_under_test }})
runs-on: spiceai-dev-runners
timeout-minutes: 600
steps:
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-cc
- uses: ./.github/actions/management-login
with:
token-url: ${{ inputs.environment == 'production' && 'https://spice.ai/api/oauth/token' || 'https://dev.spice.ai/api/oauth/token' }}
client-id: ${{ inputs.environment == 'production' && secrets.SPICE_MANAGEMENT_CLIENT_ID_PROD || secrets.SPICE_MANAGEMENT_CLIENT_ID }}
client-secret: ${{ inputs.environment == 'production' && secrets.SPICE_MANAGEMENT_CLIENT_SECRET_PROD || secrets.SPICE_MANAGEMENT_CLIENT_SECRET }}
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Pull spidapter image
run: docker pull ghcr.io/spiceai/spidapter:${{ inputs.spidapter_image_tag || 'latest' }}
- uses: ./.github/actions/build-spicebench
- name: Install ADBC Postgres driver
if: ${{ startsWith(inputs.system_under_test, 'postgres-') }}
uses: columnar-tech/setup-dbc@v1
with:
drivers: postgresql
- name: Install ADBC FlightSQL driver
uses: columnar-tech/setup-dbc@v1
with:
drivers: flightsql
- name: Run spicebench
env:
SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
SPICE_CLOUD_API_URL: ${{ inputs.environment == 'production' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }}
SYSTEM_UNDER_TEST: ${{ inputs.system_under_test }}
SYSTEM_ADAPTER: ${{ inputs.system_under_test }} - ${{ inputs.acceleration }}
ACCELERATION: ${{ inputs.acceleration }}
SCENARIO: ${{ inputs.scenario || 'tpch' }}
SCALE_FACTOR: ${{ inputs.scale_factor || '1' }}
NUM_QUERY_CLIENTS: ${{ inputs.num_query_clients || '2' }}
ETL_BUCKET: spicebench
ETL_PREFIX: ${{ inputs.etl_type == 'changes' && 'data-gen-mutable' || 'data-gen' }}
ETL_REGION: us-east-1
ETL_SINK: adbc
SCHEDULER_STATE_LOCATION: s3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/
VALIDATE_CHECKPOINT_RESULTS: 'true'
CHECKPOINT_VALIDATION_TIMEOUT: ${{ inputs.checkpoint_validation_timeout || '3600' }}
SCRAPE_SUT_METRICS: 'true'
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
S3_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
EC2_SUBNET_ID: ${{ vars.EC2_SUBNET_ID }}
EC2_SECURITY_GROUP_ID: ${{ vars.EC2_SECURITY_GROUP_ID }}
EC2_AMI_ID: ${{ vars.EC2_AMI_ID }}
EC2_INSTANCE_TYPE: ${{ inputs.ec2_instance_type || vars.EC2_INSTANCE_TYPE }}
EC2_IAM_INSTANCE_PROFILE: ${{ vars.EC2_IAM_INSTANCE_PROFILE }}
SPIDAPTER_APP_MEMORY_LIMIT: ${{ inputs.app_memory_limit || '64Gi' }}
ENABLE_MODULE_DEBUG_LOGGING: ${{ inputs.enable_module_debug_logging || 'false' }}
DISABLE_TEARDOWN: ${{ inputs.disable_teardown || 'false' }}
CUSTOM_IMAGE_TAG: ${{ inputs.custom_image_tag || '' }}
USE_PRIVATE_CLUSTER: ${{ inputs.use_private_cluster || 'true' }}
run: |
set -euo pipefail
if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then
export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug'
else
export RUST_LOG='info'
fi
EXECUTOR_INSTANCE_TYPE="github-hosted-ubuntu-latest"
ETL_ENDPOINT="${MINIO_ENDPOINT}"
case "${SCALE_FACTOR}" in
0.1) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=8 ;;
1) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=20 ;;
10) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=100 ;;
*) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=10 ;;
esac
export EC2_DISK_SIZE_GB="${STREAMING_EPHEMERAL_STORAGE_LIMIT_GB}"
ETL_ARGS="--etl-bucket ${ETL_BUCKET} --scale-factor ${SCALE_FACTOR} --etl-prefix ${ETL_PREFIX} --etl-region ${ETL_REGION}"
[ -n "${ETL_ENDPOINT:-}" ] && ETL_ARGS="${ETL_ARGS} --etl-endpoint ${ETL_ENDPOINT}"
VALIDATION_ARGS=""
[ "${VALIDATE_CHECKPOINT_RESULTS}" = "true" ] && VALIDATION_ARGS="--validate-results --checkpoint-validation-timeout ${CHECKPOINT_VALIDATION_TIMEOUT}"
SUT_METRICS_ARGS=""
[ "${SCRAPE_SUT_METRICS}" = "true" ] && SUT_METRICS_ARGS="--scrape-sut-metrics"
NO_TEARDOWN_ARG=""
[ "${DISABLE_TEARDOWN}" = "true" ] && NO_TEARDOWN_ARG="--no-teardown"
# Map system_under_test → spidapter scenario + whether EC2 env vars are needed
case "${SYSTEM_UNDER_TEST}" in
postgres-wal)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=5000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
;;
postgres-debezium)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=10000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
;;
dynamodb-streams)
USE_EC2=false
export SPICEBENCH_TARGET_BATCH_ROWS=50000
;;
mongodb-streams)
USE_EC2=true
export SPICEBENCH_TARGET_BATCH_ROWS=50000
;;
esac
SPIDAPTER_CHANNEL="nightly"
[ -n "${CUSTOM_IMAGE_TAG}" ] && SPIDAPTER_CHANNEL="internal"
# Common docker opts
ADAPTER_DOCKER_OPTS="run -i"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SCENARIO=${SYSTEM_UNDER_TEST}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_CHANNEL=${SPIDAPTER_CHANNEL}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPICE_ACCELERATION=${ACCELERATION}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_APP_MEMORY_LIMIT"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_EPHEMERAL_STORAGE_LIMIT_GB=${STREAMING_EPHEMERAL_STORAGE_LIMIT_GB}Gi"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SCHEDULER_STATE_LOCATION"
if [ "${USE_EC2}" = "true" ]; then
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e EC2_SUBNET_ID -e EC2_SECURITY_GROUP_ID -e EC2_AMI_ID -e EC2_INSTANCE_TYPE -e EC2_DISK_SIZE_GB -e EC2_IAM_INSTANCE_PROFILE -e AWS_REGION=us-west-2"
fi
[ "${USE_PRIVATE_CLUSTER}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_ORGANIZATION_TAG=spicehq"
[ -n "${CUSTOM_IMAGE_TAG}" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_TAG=${CUSTOM_IMAGE_TAG}"
[ "${{ inputs.enable_spice_debug }}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SPICE_DEBUG=true"
ADAPTER_ARGS="${ADAPTER_DOCKER_OPTS} ghcr.io/spiceai/spidapter:${{ inputs.spidapter_image_tag || 'latest' }} stdio --verbose"
set -x
~/.spice/bin/spicebench run \
--concurrency "${NUM_QUERY_CLIENTS}" \
--scenario "${SCENARIO}" \
--executor-instance-type "${EXECUTOR_INSTANCE_TYPE}" \
${ETL_ARGS} \
--etl-sink "${ETL_SINK}" \
--table-format parquet \
${VALIDATION_ARGS} \
${SUT_METRICS_ARGS} \
--system-adapter-stdio-cmd docker \
--system-adapter-stdio-args "${ADAPTER_ARGS}" \
--system-adapter-env "SCHEDULER_STATE_LOCATION=${SCHEDULER_STATE_LOCATION}" \
${NO_TEARDOWN_ARG}