Skip to content

Run - databricks-sql #503

Run - databricks-sql

Run - databricks-sql #503

Workflow file for this run

name: Run
run-name: Run - ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
on:
workflow_dispatch:
inputs:
scenario:
description: 'Scenario/query set to run (e.g. tpch)'
required: true
default: 'tpch'
type: string
system_under_test:
description: 'System under test (spice_cloud via spidapter docker image, or local databricks adapter modes)'
required: true
default: spice_cloud
type: choice
options:
- spice_cloud
- databricks-sql
- databricks-lakebase
etl_type:
description: 'ETL type'
required: true
default: 'events'
type: choice
options:
- events
- changes
scale_factor:
description: 'Scale Factor'
required: true
default: '1'
type: choice
options:
- '0.1'
- '1'
- '10'
spidapter_version:
description: 'Spidapter image version tag (e.g. latest, v1.0.0)'
required: false
default: 'latest'
type: string
enable_module_debug_logging:
description: 'Enable debug logs'
required: false
default: false
type: boolean
jobs:
run-spicebench:
name: Run spicebench
runs-on: spiceai-dev-runners
timeout-minutes: 600
steps:
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-cc
- uses: ./.github/actions/management-login
if: ${{ github.event.inputs.system_under_test == 'spice_cloud' }}
with:
client-id: ${{ secrets.SPICE_MANAGEMENT_CLIENT_ID }}
client-secret: ${{ secrets.SPICE_MANAGEMENT_CLIENT_SECRET }}
- name: Log in to GHCR
if: ${{ github.event.inputs.system_under_test == 'spice_cloud' }}
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: pull spidapter image
if: ${{ github.event.inputs.system_under_test == 'spice_cloud' }}
run: docker pull ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }}
- uses: ./.github/actions/build-spicebench
- name: Restore databricks adapter cache
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
id: cache-databricks-adapter
uses: actions/cache/restore@v4
with:
path: ~/.spice/bin/databricks-system-adapter
key: databricks-system-adapter-${{ runner.os }}-${{ hashFiles('system-adapters/databricks/Cargo.toml', 'system-adapters/databricks/Cargo.lock', 'system-adapters/databricks/src/**/*.rs', 'crates/system-adapter-protocol/Cargo.toml', 'crates/system-adapter-protocol/src/**/*.rs') }}
restore-keys: |
databricks-system-adapter-${{ runner.os }}-
- name: Build databricks adapter
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') && steps.cache-databricks-adapter.outputs.cache-hit != 'true' }}
id: build-databricks-adapter
run: |
mkdir -p ~/.spice/bin
cargo build --manifest-path system-adapters/databricks/Cargo.toml
install -m 755 system-adapters/databricks/target/debug/databricks-system-adapter ~/.spice/bin/databricks-system-adapter
- name: Save databricks adapter cache
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') && steps.build-databricks-adapter.outcome == 'success' }}
uses: actions/cache/save@v4
with:
path: ~/.spice/bin/databricks-system-adapter
key: databricks-system-adapter-${{ runner.os }}-${{ hashFiles('system-adapters/databricks/Cargo.toml', 'system-adapters/databricks/Cargo.lock', 'system-adapters/databricks/src/**/*.rs', 'crates/system-adapter-protocol/Cargo.toml', 'crates/system-adapter-protocol/src/**/*.rs') }}
- name: Validate adapter configuration
env:
SYSTEM_UNDER_TEST: ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }}
SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
SPICE_CLOUD_API_URL: https://dev-api.spice.ai
DATABRICKS_ENDPOINT: ${{ secrets.DATABRICKS_ENDPOINT }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
DATABRICKS_SQL_WAREHOUSE_ID: ${{ secrets.DATABRICKS_SQL_WAREHOUSE_ID }}
DATABRICKS_CATALOG: ${{ secrets.DATABRICKS_CATALOG }}
DATABRICKS_SCHEMA: ${{ secrets.DATABRICKS_SCHEMA }}
run: |
set -euo pipefail
SYSTEM_UNDER_TEST_PREFIX="${SYSTEM_UNDER_TEST%%-*}"
if [ -z "${SCENARIO}" ]; then
echo "SCENARIO must not be empty"
exit 1
fi
case "${SYSTEM_UNDER_TEST_PREFIX}" in
spice_cloud)
if [ -z "${SPICEAI_API_KEY:-}" ]; then
echo "SPICEAI_API_KEY must be set for spice_cloud"
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "docker is required for spice_cloud mode"
exit 1
fi
docker image inspect ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }} >/dev/null 2>&1 || {
echo "spidapter docker image not found locally; pull step may have failed"
exit 1
}
;;
databricks)
for required_var in DATABRICKS_ENDPOINT DATABRICKS_TOKEN DATABRICKS_HTTP_PATH DATABRICKS_SQL_WAREHOUSE_ID; do
if [ -z "${!required_var:-}" ]; then
echo "${required_var} must be set for databricks adapter mode"
exit 1
fi
done
if echo "${DATABRICKS_ENDPOINT}" | grep -qE '^https?://'; then
echo "DATABRICKS_ENDPOINT should be a hostname only (no http/https scheme)"
exit 1
fi
if echo "${DATABRICKS_HTTP_PATH}" | grep -qE '^/'; then
echo "DATABRICKS_HTTP_PATH should not start with '/'"
exit 1
fi
if [ ! -x "${HOME}/.spice/bin/databricks-system-adapter" ]; then
echo "Local databricks adapter binary is missing or not executable at ${HOME}/.spice/bin/databricks-system-adapter"
exit 1
fi
"${HOME}/.spice/bin/databricks-system-adapter" --help >/dev/null
;;
*)
echo "Unsupported system_under_test value: ${SYSTEM_UNDER_TEST}"
exit 1
;;
esac
- name: Setup Go
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
uses: actions/setup-go@v5
with:
go-version: '1.23'
- name: Checkout adbc-databricks Go driver
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
uses: actions/checkout@v6
with:
repository: spiceai/adbc-databricks
ref: spicebench
path: adbc-databricks
- name: Build databricks Go ADBC driver
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
run: |
cd adbc-databricks/go
go build -tags driverlib -buildmode=c-shared \
-o build/libadbc_driver_databricks.so \
./pkg/
sudo install -m 755 build/libadbc_driver_databricks.so /usr/local/lib/libdatabricks.so
sudo ldconfig
- name: Install ADBC Postgres driver
if: ${{ startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
uses: columnar-tech/setup-dbc@v1
with:
drivers: postgresql
- name: Install ADBC FlightSQL driver
if: ${{ !startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
uses: columnar-tech/setup-dbc@v1
with:
drivers: flightsql
- name: Run spicebench
env:
SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
SPICE_CLOUD_API_URL: https://dev-api.spice.ai
DATABRICKS_ENDPOINT: ${{ secrets.DATABRICKS_ENDPOINT }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
DATABRICKS_SQL_WAREHOUSE_ID: ${{ secrets.DATABRICKS_SQL_WAREHOUSE_ID }}
DATABRICKS_CATALOG: ${{ secrets.DATABRICKS_CATALOG }}
DATABRICKS_SCHEMA: ${{ secrets.DATABRICKS_SCHEMA }}
DATABRICKS_STAGING_VOLUME_PATH: ${{ secrets.DATABRICKS_STAGING_VOLUME_PATH }}
SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }}
SYSTEM_UNDER_TEST: ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
SYSTEM_ADAPTER: ${{ github.event.inputs.system_under_test || 'spice_cloud' }}
NUM_QUERY_CLIENTS: '8'
ETL_BUCKET: 'spicebench'
ETL_PREFIX: ${{ github.event.inputs.etl_type == 'changes' && 'data-gen-mutable' || 'data-gen' }}
SCALE_FACTOR: ${{ github.event.inputs.scale_factor || '1' }}
ETL_REGION: 'us-east-1'
ETL_SINK: 'adbc'
SCHEDULER_STATE_LOCATION: 's3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/'
VALIDATE_CHECKPOINT_RESULTS: 'true'
ENABLE_MODULE_DEBUG_LOGGING: ${{ github.event.inputs.enable_module_debug_logging || 'false' }}
SCRAPE_SUT_METRICS: 'true'
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
S3_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEBASE_PG_HOST: ${{ secrets.LAKEBASE_PG_HOST }}
LAKEBASE_PG_USER: ${{ secrets.LAKEBASE_PG_USER }}
LAKEBASE_PG_DB_NAME: ${{ secrets.LAKEBASE_PG_DB_NAME }}
LAKEBASE_PG_SCHEMA: ${{ secrets.LAKEBASE_PG_SCHEMA }}
LAKEBASE_DATABASE_INSTANCE: ${{ secrets.LAKEBASE_DATABASE_INSTANCE }}
SPIDAPTER_ICEBERG_REGION: us-west-1
SPIDAPTER_ICEBERG_CATALOG_FROM: iceberg:https://glue.us-west-1.amazonaws.com/iceberg/v1/catalogs/211125479522/namespaces
run: |
set -euo pipefail
if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then
export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug'
else
export RUST_LOG='info'
fi
TABLE_FORMAT="parquet"
EXECUTOR_INSTANCE_TYPE="github-hosted-ubuntu-latest"
ETL_ENDPOINT="${MINIO_ENDPOINT}"
DATABRICKS_TABLE_FORMAT="${TABLE_FORMAT}"
SYSTEM_UNDER_TEST_PREFIX="${SYSTEM_UNDER_TEST%%-*}"
ETL_ARGS="--etl-bucket ${ETL_BUCKET} --scale-factor ${SCALE_FACTOR}"
if [ -n "${ETL_PREFIX}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-prefix ${ETL_PREFIX}"
fi
if [ -n "${ETL_REGION}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-region ${ETL_REGION}"
fi
if [ -n "${ETL_ENDPOINT:-}" ]; then
ETL_ARGS="${ETL_ARGS} --etl-endpoint ${ETL_ENDPOINT}"
fi
ETL_SINK_ARGS="--etl-sink ${ETL_SINK} --table-format ${TABLE_FORMAT}"
if [ "${ETL_SINK}" = "adbc" ]; then
:
fi
VALIDATION_ARGS=""
if [ "${VALIDATE_CHECKPOINT_RESULTS}" = "true" ]; then
VALIDATION_ARGS="--validate-results"
fi
SCHEDULER_STATE_ADAPTER_ENV="--system-adapter-env SCHEDULER_STATE_LOCATION=${SCHEDULER_STATE_LOCATION}"
SUT_METRICS_ARGS=""
if [ "${SCRAPE_SUT_METRICS}" = "true" ]; then
SUT_METRICS_ARGS="--scrape-sut-metrics"
fi
if [ "${SYSTEM_UNDER_TEST_PREFIX}" = "databricks" ]; then
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
export SPICEBENCH_TARGET_BATCH_ROWS=500000
export SPICEBENCH_ADBC_MAX_INGEST_BATCH_BYTES=1268435456
export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
ADAPTER_CMD="${HOME}/.spice/bin/databricks-system-adapter"
ADAPTER_ARGS="stdio"
ADAPTER_ENVS="--system-adapter-env DATABRICKS_ENDPOINT=${DATABRICKS_ENDPOINT} --system-adapter-env DATABRICKS_TOKEN=${DATABRICKS_TOKEN} --system-adapter-env DATABRICKS_HTTP_PATH=${DATABRICKS_HTTP_PATH} --system-adapter-env DATABRICKS_SQL_WAREHOUSE_ID=${DATABRICKS_SQL_WAREHOUSE_ID} --system-adapter-env DATABRICKS_TABLE_FORMAT=${DATABRICKS_TABLE_FORMAT}"
if [ -n "${DATABRICKS_CATALOG:-}" ]; then
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_CATALOG=${DATABRICKS_CATALOG}"
fi
if [ -n "${DATABRICKS_SCHEMA:-}" ]; then
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_SCHEMA=${DATABRICKS_SCHEMA}"
fi
if [ -n "${DATABRICKS_STAGING_VOLUME_PATH:-}" ]; then
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_STAGING_VOLUME_PATH=${DATABRICKS_STAGING_VOLUME_PATH}"
fi
else
export SPICEBENCH_ADBC_UPDATE_STRATEGY=bulk_ingest_upsert
export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
ADAPTER_CMD="docker"
ADAPTER_ARGS="run -i -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY} -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM -e SCHEDULER_STATE_LOCATION ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_version || 'latest' }} stdio --verbose --channel nightly"
ADAPTER_ENVS=""
fi
if [ "${SYSTEM_UNDER_TEST}" = "databricks-lakebase" ]; then
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_COMPUTE_MODE=lakebase"
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env LAKEBASE_PG_HOST=${LAKEBASE_PG_HOST}"
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env LAKEBASE_PG_USER=${LAKEBASE_PG_USER}"
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env LAKEBASE_PG_DB_NAME=${LAKEBASE_PG_DB_NAME}"
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env LAKEBASE_PG_SCHEMA=${LAKEBASE_PG_SCHEMA}"
ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env LAKEBASE_DATABASE_INSTANCE=${LAKEBASE_DATABASE_INSTANCE}"
fi
~/.spice/bin/spicebench run \
--concurrency "${NUM_QUERY_CLIENTS}" \
--scenario "${SCENARIO}" \
--executor-instance-type "${EXECUTOR_INSTANCE_TYPE}" \
${ETL_ARGS} \
${ETL_SINK_ARGS} \
${VALIDATION_ARGS} \
${SUT_METRICS_ARGS} \
--system-adapter-stdio-cmd "${ADAPTER_CMD}" \
--system-adapter-stdio-args "${ADAPTER_ARGS}" \
${ADAPTER_ENVS} \
${SCHEDULER_STATE_ADAPTER_ENV} \