Run spicebench - tpch #104
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run spicebench | |
| run-name: Run spicebench - ${{ github.event.inputs.scenario || 'tpch' }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| scenario: | |
| description: 'Scenario/query set to run (e.g. tpch)' | |
| required: true | |
| default: 'tpch' | |
| type: string | |
| system_adapter: | |
| description: 'System adapter to run (docker spidapter or local databricks adapter)' | |
| required: true | |
| default: spidapter | |
| type: choice | |
| options: | |
| - spidapter | |
| - databricks | |
| databricks_variant: | |
| description: 'Databricks adapter variant (databricks or lakebase)' | |
| required: false | |
| default: 'databricks' | |
| type: choice | |
| options: | |
| - databricks | |
| - lakebase | |
| etl_bucket: | |
| description: 'S3 bucket for ETL source and target data' | |
| required: true | |
| default: 'spiceai-public-datasets' | |
| type: string | |
| etl_prefix: | |
| description: 'S3 key prefix (the {prefix} portion of {prefix}/{scenario}/{version}/)' | |
| required: false | |
| default: 'data-gen' | |
| type: string | |
| etl_version: | |
| description: 'Version identifier for the data generation to read from' | |
| required: true | |
| type: string | |
| default: "1" | |
| etl_region: | |
| description: 'AWS region for the ETL S3 bucket' | |
| required: false | |
| default: 'us-east-1' | |
| type: string | |
| etl_endpoint: | |
| description: 'S3 endpoint URL for ETL bucket (for MinIO/LocalStack)' | |
| required: false | |
| type: string | |
| table_format: | |
| description: 'Table format across generation and adapter setup (iceberg, parquet, delta)' | |
| required: false | |
| default: 'parquet' | |
| type: string | |
| executor_instance_type: | |
| description: 'Executor instance type label for benchmark comparison and dashboarding' | |
| required: false | |
| default: 'github-hosted-ubuntu-latest' | |
| type: string | |
| with_created_at: | |
| description: 'Append a __created_at timestamp column to every batch written to the sink' | |
| required: false | |
| default: false | |
| type: boolean | |
| jobs: | |
| run-spicebench: | |
| name: Run spicebench | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 600 | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - uses: ./.github/actions/management-login | |
| if: ${{ github.event.inputs.system_adapter == 'spidapter' }} | |
| with: | |
| client-id: ${{ secrets.SPICE_MANAGEMENT_CLIENT_ID }} | |
| client-secret: ${{ secrets.SPICE_MANAGEMENT_CLIENT_SECRET }} | |
| - name: Log in to GHCR | |
| if: ${{ github.event.inputs.system_adapter == 'spidapter' }} | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: pull spidapter image | |
| if: ${{ github.event.inputs.system_adapter == 'spidapter' }} | |
| run: docker pull ghcr.io/spiceai/spidapter:latest | |
| - uses: ./.github/actions/build-spicebench | |
| - name: Restore databricks adapter cache | |
| if: ${{ github.event.inputs.system_adapter == 'databricks' }} | |
| id: cache-databricks-adapter | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: ~/.spice/bin/databricks-system-adapter | |
| key: databricks-system-adapter-${{ runner.os }}-${{ hashFiles('system-adapters/databricks/Cargo.toml', 'system-adapters/databricks/Cargo.lock', 'system-adapters/databricks/src/**/*.rs', 'crates/system-adapter-protocol/Cargo.toml', 'crates/system-adapter-protocol/src/**/*.rs') }} | |
| restore-keys: | | |
| databricks-system-adapter-${{ runner.os }}- | |
| - name: Build databricks adapter | |
| if: ${{ github.event.inputs.system_adapter == 'databricks' && steps.cache-databricks-adapter.outputs.cache-hit != 'true' }} | |
| id: build-databricks-adapter | |
| run: | | |
| mkdir -p ~/.spice/bin | |
| cargo build --manifest-path system-adapters/databricks/Cargo.toml | |
| install -m 755 system-adapters/databricks/target/debug/databricks-system-adapter ~/.spice/bin/databricks-system-adapter | |
| - name: Save databricks adapter cache | |
| if: ${{ github.event.inputs.system_adapter == 'databricks' && steps.build-databricks-adapter.outcome == 'success' }} | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: ~/.spice/bin/databricks-system-adapter | |
| key: databricks-system-adapter-${{ runner.os }}-${{ hashFiles('system-adapters/databricks/Cargo.toml', 'system-adapters/databricks/Cargo.lock', 'system-adapters/databricks/src/**/*.rs', 'crates/system-adapter-protocol/Cargo.toml', 'crates/system-adapter-protocol/src/**/*.rs') }} | |
| - name: Validate adapter configuration | |
| env: | |
| SYSTEM_ADAPTER: ${{ github.event.inputs.system_adapter || 'spidapter' }} | |
| SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }} | |
| SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }} | |
| SPICE_CLOUD_API_URL: https://dev-api.spice.ai | |
| DATABRICKS_ENDPOINT: ${{ secrets.DATABRICKS_ENDPOINT }} | |
| DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} | |
| DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }} | |
| DATABRICKS_SQL_WAREHOUSE_ID: ${{ secrets.DATABRICKS_SQL_WAREHOUSE_ID }} | |
| DATABRICKS_CATALOG: ${{ secrets.DATABRICKS_CATALOG }} | |
| DATABRICKS_SCHEMA: ${{ secrets.DATABRICKS_SCHEMA }} | |
| DATABRICKS_VARIANT: ${{ github.event.inputs.databricks_variant || 'databricks' }} | |
| DATABRICKS_TABLE_FORMAT: ${{ github.event.inputs.table_format || 'parquet' }} | |
| run: | | |
| set -euo pipefail | |
| if [ -z "${SCENARIO}" ]; then | |
| echo "SCENARIO must not be empty" | |
| exit 1 | |
| fi | |
| case "${SYSTEM_ADAPTER}" in | |
| spidapter) | |
| if [ -z "${SPICEAI_API_KEY:-}" ]; then | |
| echo "SPICEAI_API_KEY must be set for spidapter" | |
| exit 1 | |
| fi | |
| if ! command -v docker >/dev/null 2>&1; then | |
| echo "docker is required for spidapter mode" | |
| exit 1 | |
| fi | |
| docker image inspect ghcr.io/spiceai/spidapter:latest >/dev/null 2>&1 || { | |
| echo "spidapter docker image not found locally; pull step may have failed" | |
| exit 1 | |
| } | |
| ;; | |
| databricks) | |
| if [ "${DATABRICKS_VARIANT}" != "databricks" ] && [ "${DATABRICKS_VARIANT}" != "lakebase" ]; then | |
| echo "DATABRICKS_VARIANT must be either 'databricks' or 'lakebase'" | |
| exit 1 | |
| fi | |
| for required_var in DATABRICKS_ENDPOINT DATABRICKS_TOKEN DATABRICKS_HTTP_PATH DATABRICKS_SQL_WAREHOUSE_ID; do | |
| if [ -z "${!required_var:-}" ]; then | |
| echo "${required_var} must be set for databricks adapter mode" | |
| exit 1 | |
| fi | |
| done | |
| if echo "${DATABRICKS_ENDPOINT}" | grep -qE '^https?://'; then | |
| echo "DATABRICKS_ENDPOINT should be a hostname only (no http/https scheme)" | |
| exit 1 | |
| fi | |
| if echo "${DATABRICKS_HTTP_PATH}" | grep -qE '^/'; then | |
| echo "DATABRICKS_HTTP_PATH should not start with '/'" | |
| exit 1 | |
| fi | |
| if [ ! -x "${HOME}/.spice/bin/databricks-system-adapter" ]; then | |
| echo "Local databricks adapter binary is missing or not executable at ${HOME}/.spice/bin/databricks-system-adapter" | |
| exit 1 | |
| fi | |
| "${HOME}/.spice/bin/databricks-system-adapter" --help >/dev/null | |
| ;; | |
| *) | |
| echo "Unsupported system_adapter value: ${SYSTEM_ADAPTER}" | |
| exit 1 | |
| ;; | |
| esac | |
| - name: Install ADBC driver | |
| env: | |
| SYSTEM_ADAPTER: ${{ github.event.inputs.system_adapter || 'spidapter' }} | |
| EXECUTOR_INSTANCE_TYPE: ${{ github.event.inputs.executor_instance_type || 'github-hosted-ubuntu-latest' }} | |
| run: | | |
| set -euo pipefail | |
| curl -LsSf https://dbc.columnar.tech/install.sh | sh | |
| if [ "${SYSTEM_ADAPTER}" = "databricks" ]; then | |
| dbc install databricks | |
| else | |
| dbc install flightsql | |
| fi | |
| - name: Run spicebench | |
| env: | |
| SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }} | |
| SPICE_CLOUD_API_URL: https://dev-api.spice.ai | |
| DATABRICKS_ENDPOINT: ${{ secrets.DATABRICKS_ENDPOINT }} | |
| DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} | |
| DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }} | |
| DATABRICKS_SQL_WAREHOUSE_ID: ${{ secrets.DATABRICKS_SQL_WAREHOUSE_ID }} | |
| DATABRICKS_CATALOG: ${{ secrets.DATABRICKS_CATALOG }} | |
| DATABRICKS_SCHEMA: ${{ secrets.DATABRICKS_SCHEMA }} | |
| DATABRICKS_VARIANT: ${{ github.event.inputs.databricks_variant || 'databricks' }} | |
| DATABRICKS_TABLE_FORMAT: ${{ github.event.inputs.table_format || 'parquet' }} | |
| SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }} | |
| SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }} | |
| SYSTEM_ADAPTER: ${{ github.event.inputs.system_adapter || 'spidapter' }} | |
| EXECUTOR_INSTANCE_TYPE: ${{ github.event.inputs.executor_instance_type || 'github-hosted-ubuntu-latest' }} | |
| ETL_BUCKET: ${{ github.event.inputs.etl_bucket }} | |
| ETL_PREFIX: ${{ github.event.inputs.etl_prefix || 'data-gen' }} | |
| ETL_VERSION: ${{ github.event.inputs.etl_version }} | |
| ETL_REGION: ${{ github.event.inputs.etl_region || 'us-east-1' }} | |
| ETL_ENDPOINT: ${{ github.event.inputs.etl_endpoint }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| SPIDAPTER_ICEBERG_REGION: us-west-1 | |
| SPIDAPTER_ICEBERG_CATALOG_FROM: iceberg:https://glue.us-west-1.amazonaws.com/iceberg/v1/catalogs/211125479522/namespaces | |
| RUST_LOG: 'info' | |
| run: | | |
| ETL_ARGS="--etl-bucket ${ETL_BUCKET} --etl-version ${ETL_VERSION}" | |
| if [ -n "${ETL_PREFIX}" ]; then | |
| ETL_ARGS="${ETL_ARGS} --etl-prefix ${ETL_PREFIX}" | |
| fi | |
| if [ -n "${ETL_REGION}" ]; then | |
| ETL_ARGS="${ETL_ARGS} --etl-region ${ETL_REGION}" | |
| fi | |
| if [ -n "${ETL_ENDPOINT}" ]; then | |
| ETL_ARGS="${ETL_ARGS} --etl-endpoint ${ETL_ENDPOINT}" | |
| fi | |
| if [ "${SYSTEM_ADAPTER}" = "databricks" ]; then | |
| ADAPTER_CMD="${HOME}/.spice/bin/databricks-system-adapter" | |
| ADAPTER_ARGS="stdio" | |
| ADAPTER_ENVS="--system-adapter-env DATABRICKS_ENDPOINT=${DATABRICKS_ENDPOINT} --system-adapter-env DATABRICKS_TOKEN=${DATABRICKS_TOKEN} --system-adapter-env DATABRICKS_HTTP_PATH=${DATABRICKS_HTTP_PATH} --system-adapter-env DATABRICKS_SQL_WAREHOUSE_ID=${DATABRICKS_SQL_WAREHOUSE_ID} --system-adapter-env DATABRICKS_VARIANT=${DATABRICKS_VARIANT} --system-adapter-env DATABRICKS_TABLE_FORMAT=${DATABRICKS_TABLE_FORMAT}" | |
| if [ -n "${DATABRICKS_CATALOG}" ]; then | |
| ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_CATALOG=${DATABRICKS_CATALOG}" | |
| fi | |
| if [ -n "${DATABRICKS_SCHEMA}" ]; then | |
| ADAPTER_ENVS="${ADAPTER_ENVS} --system-adapter-env DATABRICKS_SCHEMA=${DATABRICKS_SCHEMA}" | |
| fi | |
| else | |
| ADAPTER_CMD="docker" | |
| ADAPTER_ARGS="run -i -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM ghcr.io/spiceai/spidapter:latest stdio --verbose --channel nightly" | |
| ADAPTER_ENVS="" | |
| fi | |
| WITH_CREATED_AT_FLAG="" | |
| if [ "${{ github.event.inputs.with_created_at }}" = "true" ]; then | |
| WITH_CREATED_AT_FLAG="--with-created-at" | |
| fi | |
| ~/.spice/bin/spicebench \ | |
| --concurrency 2 \ | |
| --scenario "${SCENARIO}" \ | |
| --executor-instance-type "${EXECUTOR_INSTANCE_TYPE}" \ | |
| ${ETL_ARGS} \ | |
| --system-adapter-stdio-cmd "${ADAPTER_CMD}" \ | |
| --system-adapter-stdio-args "${ADAPTER_ARGS}" \ | |
| ${ADAPTER_ENVS} \ | |
| ${WITH_CREATED_AT_FLAG} |