Skip to content

Run (Debug) - spice_cloud #121

Run (Debug) - spice_cloud

Run (Debug) - spice_cloud #121

name: Run (Debug - Spice Cloud)
run-name: Run (Debug) - ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
on:
workflow_dispatch:
inputs:
system_under_test:
description: "System under test"
required: true
default: spice_cloud
type: choice
options:
- spice_cloud
environment:
description: "Target Spice Cloud Environment"
required: true
default: "dev"
type: choice
options:
- dev
- prod
scenario:
description: "Scenario/query set to run (e.g. tpch)"
required: true
default: "tpch"
type: string
etl_type:
description: "ETL type"
required: true
default: "events"
type: choice
options:
- events
- changes
scale_factor:
description: "Scale Factor"
required: true
default: "1"
type: choice
options:
- "0.1"
- "1"
- "10"
spidapter_version:
description: "Spidapter image version tag (e.g. latest, v1.0.0)"
required: false
default: "latest"
type: string
num_query_clients:
description: "Number of concurrent query clients"
required: false
default: "2"
type: string
enable_module_debug_logging:
description: "Enable debug logs"
required: false
default: false
type: boolean
disable_teardown:
description: "Skip the teardown RPC call to the system adapter"
required: false
default: false
type: boolean
enable_pvc:
description: "Enable PVC block storage for scheduler and executor pods (sizes are derived from scale_factor)"
required: false
default: false
type: boolean
executor_replicas:
description: "Number of executor replicas"
required: false
default: "4"
type: string
app_memory_limit:
description: "Memory limit for the scheduler (app) pod (e.g. 16Gi, 20Gi)"
required: false
default: "16Gi"
type: string
executor_memory_limit:
description: "Memory limit for the executor pod (e.g. 16Gi, 62Gi)"
required: false
default: "16Gi"
type: string
custom_image:
description: "Custom runtime container image (e.g. ghcr.io/spiceai/spiceai-dev:spicebench-sf10). Overrides the default channel image. Requires the internal update channel on the target SCP environment."
required: false
default: ""
type: string
jobs:
run-spicebench:
name: Run spicebench
runs-on: spiceai-dev-runners
timeout-minutes: 600
steps:
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-cc
- uses: ./.github/actions/management-login
with:
token-url: ${{ github.event.inputs.environment == 'prod' && 'https://spice.ai/api/oauth/token' || 'https://dev.spice.ai/api/oauth/token' }}
client-id: ${{ github.event.inputs.environment == 'prod' && secrets.SPICE_MANAGEMENT_CLIENT_ID_PROD || secrets.SPICE_MANAGEMENT_CLIENT_ID }}
client-secret: ${{ github.event.inputs.environment == 'prod' && secrets.SPICE_MANAGEMENT_CLIENT_SECRET_PROD || secrets.SPICE_MANAGEMENT_CLIENT_SECRET }}
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: pull spidapter image
run: docker pull ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }}
- uses: ./.github/actions/build-spicebench
- name: Validate adapter configuration
env:
SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }}
SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
SPICE_CLOUD_API_URL: ${{ github.event.inputs.environment == 'prod' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }}
run: |
set -euo pipefail
if [ -z "${SCENARIO}" ]; then
echo "SCENARIO must not be empty"
exit 1
fi
if [ -z "${SPICEAI_API_KEY:-}" ]; then
echo "SPICEAI_API_KEY must be set for spice_cloud"
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "docker is required for spice_cloud mode"
exit 1
fi
docker image inspect ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }} >/dev/null 2>&1 || {
echo "spidapter docker image not found locally; pull step may have failed"
exit 1
}
- name: Install ADBC FlightSQL driver
uses: columnar-tech/setup-dbc@v1
with:
drivers: flightsql
- name: Run spicebench
env:
SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
SPICE_CLOUD_API_URL: ${{ github.event.inputs.environment == 'prod' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }}
SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }}
SYSTEM_UNDER_TEST: ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
SYSTEM_ADAPTER: ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
NUM_QUERY_CLIENTS: ${{ github.event.inputs.num_query_clients || '2' }}
ETL_BUCKET: "spicebench"
ETL_PREFIX: ${{ github.event.inputs.etl_type == 'changes' && 'data-gen-mutable' || 'data-gen' }}
SCALE_FACTOR: ${{ github.event.inputs.scale_factor || '1' }}
ETL_REGION: "us-east-1"
ETL_SINK: "adbc"
SCHEDULER_STATE_LOCATION: "s3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/"
VALIDATE_CHECKPOINT_RESULTS: "true"
ENABLE_MODULE_DEBUG_LOGGING: ${{ github.event.inputs.enable_module_debug_logging || 'false' }}
SCRAPE_SUT_METRICS: "true"
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
S3_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
SPIDAPTER_ICEBERG_REGION: us-west-1
SPIDAPTER_ICEBERG_CATALOG_FROM: iceberg:https://glue.us-west-1.amazonaws.com/iceberg/v1/catalogs/211125479522/namespaces
DISABLE_TEARDOWN: ${{ github.event.inputs.disable_teardown || 'false' }}
ENABLE_PVC: ${{ github.event.inputs.enable_pvc || 'false' }}
EXECUTOR_REPLICAS: ${{ github.event.inputs.executor_replicas || '4' }}
SPIDAPTER_APP_MEMORY_LIMIT: ${{ github.event.inputs.app_memory_limit || '16Gi' }}
SPIDAPTER_EXECUTOR_MEMORY_LIMIT: ${{ github.event.inputs.executor_memory_limit || '16Gi' }}
SPIDAPTER_ORGANIZATION_TAG: "spicehq"
CUSTOM_IMAGE: ${{ github.event.inputs.custom_image || '' }}
run: |
set -euo pipefail
if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then
export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug'
else
export RUST_LOG='info'
fi
TABLE_FORMAT="parquet"
EXECUTOR_INSTANCE_TYPE="github-hosted-ubuntu-latest"
ETL_ENDPOINT="${MINIO_ENDPOINT}"
ETL_ARGS="--etl-bucket ${ETL_BUCKET} --scale-factor ${SCALE_FACTOR}"
if [ -n "${ETL_PREFIX}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-prefix ${ETL_PREFIX}"
fi
if [ -n "${ETL_REGION}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-region ${ETL_REGION}"
fi
if [ -n "${ETL_ENDPOINT:-}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-endpoint ${ETL_ENDPOINT}"
fi
ETL_SINK_ARGS="--etl-sink ${ETL_SINK} --table-format ${TABLE_FORMAT}"
if [ "${ETL_SINK}" = "adbc" ]; then
:
fi
VALIDATION_ARGS=""
if [ "${VALIDATE_CHECKPOINT_RESULTS}" = "true" ]; then
VALIDATION_ARGS="--validate-results"
fi
SCHEDULER_STATE_ADAPTER_ENV="--system-adapter-env SCHEDULER_STATE_LOCATION=${SCHEDULER_STATE_LOCATION}"
SUT_METRICS_ARGS=""
if [ "${SCRAPE_SUT_METRICS}" = "true" ]; then
SUT_METRICS_ARGS="--scrape-sut-metrics"
fi
export SPICEBENCH_ADBC_UPDATE_STRATEGY=bulk_ingest_upsert
export SPICEBENCH_ADBC_FLUSH_STREAM_BEFORE_UPSERT=true
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
export SPICEBENCH_ADBC_SINK_POOL_SIZE=1
ADAPTER_CMD="docker"
ADAPTER_DOCKER_OPTS="run -i -e SPIDAPTER_EXECUTOR_REPLICAS=${EXECUTOR_REPLICAS} -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY} -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM -e SCHEDULER_STATE_LOCATION"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_APP_MEMORY_LIMIT=${SPIDAPTER_APP_MEMORY_LIMIT}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_EXECUTOR_MEMORY_LIMIT=${SPIDAPTER_EXECUTOR_MEMORY_LIMIT}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_ORGANIZATION_TAG"
# Parse custom image into registry, name, and tag for spidapter.
# Expected format: registry/image:tag (e.g. ghcr.io/spiceai/spiceai-dev:spicebench-sf10)
# Parses as: registry=ghcr.io, image=spiceai/spiceai-dev, tag=spicebench-sf10
if [ -n "${CUSTOM_IMAGE}" ]; then
IMAGE_WITH_TAG="${CUSTOM_IMAGE}"
if [[ "${IMAGE_WITH_TAG}" == *":"* ]]; then
SPIDAPTER_IMAGE_TAG="${IMAGE_WITH_TAG##*:}"
IMAGE_WITHOUT_TAG="${IMAGE_WITH_TAG%:*}"
else
SPIDAPTER_IMAGE_TAG=""
IMAGE_WITHOUT_TAG="${IMAGE_WITH_TAG}"
fi
# Registry is the first path component (hostname), image is the rest
SPIDAPTER_IMAGE_REGISTRY="${IMAGE_WITHOUT_TAG%%/*}"
SPIDAPTER_IMAGE_NAME="${IMAGE_WITHOUT_TAG#*/}"
echo "Custom image: registry=${SPIDAPTER_IMAGE_REGISTRY}, name=${SPIDAPTER_IMAGE_NAME}, tag=${SPIDAPTER_IMAGE_TAG}"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_REGISTRY=${SPIDAPTER_IMAGE_REGISTRY} -e SPIDAPTER_IMAGE_NAME=${SPIDAPTER_IMAGE_NAME} -e SPIDAPTER_QUERY_MEMORY_LIMIT=500Gi"
if [ -n "${SPIDAPTER_IMAGE_TAG}" ]; then
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_TAG=${SPIDAPTER_IMAGE_TAG}"
fi
fi
if [ "${ENABLE_PVC}" = "true" ]; then
echo "PVC enabled: app=3GB, executor=2GB"
ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_APP_STORAGE_SIZE_GB=3 -e SPIDAPTER_EXECUTOR_STORAGE_SIZE_GB=2 -e SPIDAPTER_CAYENNE_DATA_DIR=/data/data -e SPIDAPTER_CAYENNE_METADATA_DIR=/data/metadata"
fi
# Use internal channel when a custom image is specified, otherwise nightly.
SPIDAPTER_CHANNEL="nightly"
if [ -n "${CUSTOM_IMAGE}" ]; then
SPIDAPTER_CHANNEL="internal"
fi
ADAPTER_ARGS="${ADAPTER_DOCKER_OPTS} ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }} stdio --verbose --channel ${SPIDAPTER_CHANNEL}"
ADAPTER_ENVS=""
NO_TEARDOWN_ARG=""
if [ "${DISABLE_TEARDOWN}" = "true" ]; then
NO_TEARDOWN_ARG="--no-teardown"
fi
~/.spice/bin/spicebench run \
--concurrency "${NUM_QUERY_CLIENTS}" \
--scenario "${SCENARIO}" \
--executor-instance-type "${EXECUTOR_INSTANCE_TYPE}" \
${ETL_ARGS} \
${ETL_SINK_ARGS} \
${VALIDATION_ARGS} \
${SUT_METRICS_ARGS} \
--system-adapter-stdio-cmd "${ADAPTER_CMD}" \
--system-adapter-stdio-args "${ADAPTER_ARGS}" \
${ADAPTER_ENVS} \
${SCHEDULER_STATE_ADAPTER_ENV} \
${NO_TEARDOWN_ARG} \