Run Streaming - postgres-wal - cayenne - changes - SF10 #78
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run - Streaming | |
| run-name: Run Streaming - ${{ github.event.inputs.system_under_test }} - ${{ github.event.inputs.acceleration }} - ${{ github.event.inputs.etl_type }} - SF${{ github.event.inputs.scale_factor }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| environment: | |
| description: 'Target Spice Cloud Environment' | |
| required: true | |
| default: 'dev' | |
| type: choice | |
| options: | |
| - dev | |
| - production | |
| system_under_test: | |
| description: 'Streaming source' | |
| required: true | |
| default: postgres-wal | |
| type: choice | |
| options: | |
| - postgres-wal | |
| - dynamodb-streams | |
| - mongodb-streams | |
| acceleration: | |
| description: 'Acceleration engine' | |
| required: true | |
| default: cayenne | |
| type: choice | |
| options: | |
| - cayenne | |
| - duckdb | |
| scenario: | |
| description: 'Scenario/query set to run (e.g. tpch)' | |
| required: true | |
| default: 'tpch' | |
| type: string | |
| etl_type: | |
| description: 'ETL type' | |
| required: true | |
| default: 'changes' | |
| type: choice | |
| options: | |
| - events | |
| - changes | |
| scale_factor: | |
| description: 'Scale Factor' | |
| required: true | |
| default: '1' | |
| type: choice | |
| options: | |
| - '0.1' | |
| - '1' | |
| - '10' | |
| spidapter_image_tag: | |
| description: 'Spidapter Docker image tag (e.g. latest, viktor)' | |
| required: false | |
| default: 'latest' | |
| type: string | |
| custom_image_tag: | |
| description: 'Custom runtime container image tag from ghcr.io/spiceai/spiceai-dev (uses internal channel)' | |
| required: false | |
| default: '' | |
| type: string | |
| num_query_clients: | |
| description: 'Number of concurrent query clients' | |
| required: false | |
| default: '2' | |
| type: string | |
| app_memory_limit: | |
| description: 'Memory limit for the SCP pod (e.g. 64Gi)' | |
| required: false | |
| default: '64Gi' | |
| type: string | |
| ec2_instance_type: | |
| description: 'EC2 instance type override (e.g. m5.large, m5.2xlarge, r5.2xlarge)' | |
| required: false | |
| default: '' | |
| type: string | |
| checkpoint_validation_timeout: | |
| description: 'Max seconds to wait for checkpoint convergence (default: 3600)' | |
| required: false | |
| default: '3600' | |
| type: string | |
| use_private_cluster: | |
| description: 'Pin SCP app to the private cluster (spicehq org tag)' | |
| required: false | |
| default: true | |
| type: boolean | |
| enable_module_debug_logging: | |
| description: 'Enable debug logs' | |
| required: false | |
| default: false | |
| type: boolean | |
| enable_spice_debug: | |
| description: 'Enable debug mode on the Spice Cloud deployment' | |
| required: false | |
| default: false | |
| type: boolean | |
| disable_teardown: | |
| description: 'Skip teardown — keeps EC2 instances and SCP app alive for inspection' | |
| required: false | |
| default: false | |
| type: boolean | |
| jobs: | |
| run-spicebench: | |
| name: Run spicebench (streaming - ${{ github.event.inputs.system_under_test }}) | |
| runs-on: spiceai-dev-runners | |
| timeout-minutes: 600 | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - uses: ./.github/actions/setup-cc | |
| - uses: ./.github/actions/management-login | |
| with: | |
| token-url: ${{ github.event.inputs.environment == 'production' && 'https://spice.ai/api/oauth/token' || 'https://dev.spice.ai/api/oauth/token' }} | |
| client-id: ${{ github.event.inputs.environment == 'production' && secrets.SPICE_MANAGEMENT_CLIENT_ID_PROD || secrets.SPICE_MANAGEMENT_CLIENT_ID }} | |
| client-secret: ${{ github.event.inputs.environment == 'production' && secrets.SPICE_MANAGEMENT_CLIENT_SECRET_PROD || secrets.SPICE_MANAGEMENT_CLIENT_SECRET }} | |
| - name: Log in to GHCR | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Pull spidapter image | |
| run: docker pull ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_image_tag || 'latest' }} | |
| - uses: ./.github/actions/build-spicebench | |
| - name: Install ADBC Postgres driver | |
| if: ${{ startsWith(github.event.inputs.system_under_test, 'postgres-') }} | |
| uses: columnar-tech/setup-dbc@v1 | |
| with: | |
| drivers: postgresql | |
| - name: Install ADBC FlightSQL driver | |
| uses: columnar-tech/setup-dbc@v1 | |
| with: | |
| drivers: flightsql | |
| - name: Run spicebench | |
| env: | |
| SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }} | |
| SPICE_CLOUD_API_URL: ${{ github.event.inputs.environment == 'production' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }} | |
| SYSTEM_UNDER_TEST: ${{ github.event.inputs.system_under_test }} | |
| SYSTEM_ADAPTER: ${{ github.event.inputs.system_under_test }} | |
| ACCELERATION: ${{ github.event.inputs.acceleration }} | |
| SCENARIO: ${{ github.event.inputs.scenario || 'tpch' }} | |
| SCALE_FACTOR: ${{ github.event.inputs.scale_factor || '1' }} | |
| NUM_QUERY_CLIENTS: ${{ github.event.inputs.num_query_clients || '2' }} | |
| ETL_BUCKET: spicebench | |
| ETL_PREFIX: ${{ github.event.inputs.etl_type == 'changes' && 'data-gen-mutable' || 'data-gen' }} | |
| ETL_REGION: us-east-1 | |
| ETL_SINK: adbc | |
| SCHEDULER_STATE_LOCATION: s3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/ | |
| VALIDATE_CHECKPOINT_RESULTS: 'true' | |
| CHECKPOINT_VALIDATION_TIMEOUT: ${{ github.event.inputs.checkpoint_validation_timeout || '3600' }} | |
| SCRAPE_SUT_METRICS: 'true' | |
| SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }} | |
| MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }} | |
| S3_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| EC2_SUBNET_ID: ${{ vars.EC2_SUBNET_ID }} | |
| EC2_SECURITY_GROUP_ID: ${{ vars.EC2_SECURITY_GROUP_ID }} | |
| EC2_AMI_ID: ${{ vars.EC2_AMI_ID }} | |
| EC2_INSTANCE_TYPE: ${{ github.event.inputs.ec2_instance_type || vars.EC2_INSTANCE_TYPE }} | |
| EC2_IAM_INSTANCE_PROFILE: ${{ vars.EC2_IAM_INSTANCE_PROFILE }} | |
| SPIDAPTER_APP_MEMORY_LIMIT: ${{ github.event.inputs.app_memory_limit || '64Gi' }} | |
| ENABLE_MODULE_DEBUG_LOGGING: ${{ github.event.inputs.enable_module_debug_logging || 'false' }} | |
| DISABLE_TEARDOWN: ${{ github.event.inputs.disable_teardown || 'false' }} | |
| CUSTOM_IMAGE_TAG: ${{ github.event.inputs.custom_image_tag || '' }} | |
| USE_PRIVATE_CLUSTER: ${{ github.event.inputs.use_private_cluster || 'true' }} | |
| run: | | |
| set -euo pipefail | |
| if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then | |
| export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug' | |
| else | |
| export RUST_LOG='info,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug' | |
| fi | |
| EXECUTOR_INSTANCE_TYPE="github-hosted-ubuntu-latest" | |
| ETL_ENDPOINT="${MINIO_ENDPOINT}" | |
| case "${SCALE_FACTOR}" in | |
| 0.1) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=8 ;; | |
| 1) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=20 ;; | |
| 10) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=400 ;; | |
| *) STREAMING_EPHEMERAL_STORAGE_LIMIT_GB=10 ;; | |
| esac | |
| export EC2_DISK_SIZE_GB="${STREAMING_EPHEMERAL_STORAGE_LIMIT_GB}" | |
| ETL_ARGS="--etl-bucket ${ETL_BUCKET} --scale-factor ${SCALE_FACTOR} --etl-prefix ${ETL_PREFIX} --etl-region ${ETL_REGION}" | |
| [ -n "${ETL_ENDPOINT:-}" ] && ETL_ARGS="${ETL_ARGS} --etl-endpoint ${ETL_ENDPOINT}" | |
| VALIDATION_ARGS="" | |
| [ "${VALIDATE_CHECKPOINT_RESULTS}" = "true" ] && VALIDATION_ARGS="--validate-results --checkpoint-validation-timeout ${CHECKPOINT_VALIDATION_TIMEOUT}" | |
| SUT_METRICS_ARGS="" | |
| [ "${SCRAPE_SUT_METRICS}" = "true" ] && SUT_METRICS_ARGS="--scrape-sut-metrics" | |
| NO_TEARDOWN_ARG="" | |
| [ "${DISABLE_TEARDOWN}" = "true" ] && NO_TEARDOWN_ARG="--no-teardown" | |
| # Map system_under_test → spidapter scenario + whether EC2 env vars are needed | |
| case "${SYSTEM_UNDER_TEST}" in | |
| postgres-wal) | |
| USE_EC2=true | |
| export SPICEBENCH_TARGET_BATCH_ROWS=50000 | |
| export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=1000 | |
| export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table | |
| export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false | |
| export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true | |
| ;; | |
| postgres-debezium) | |
| USE_EC2=true | |
| export SPICEBENCH_TARGET_BATCH_ROWS=50000 | |
| export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=1000 | |
| export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table | |
| export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false | |
| export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true | |
| ;; | |
| dynamodb-streams) | |
| USE_EC2=false | |
| export SPICEBENCH_TARGET_BATCH_ROWS=50000 | |
| ;; | |
| mongodb-streams) | |
| USE_EC2=true | |
| export SPICEBENCH_TARGET_BATCH_ROWS=50000 | |
| ;; | |
| esac | |
| SPIDAPTER_CHANNEL="nightly" | |
| [ -n "${CUSTOM_IMAGE_TAG}" ] && SPIDAPTER_CHANNEL="internal" | |
| # Common docker opts | |
| ADAPTER_DOCKER_OPTS="run -i" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SCENARIO=${SYSTEM_UNDER_TEST}" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_CHANNEL=${SPIDAPTER_CHANNEL}" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPICE_ACCELERATION=${ACCELERATION}" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY}" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_APP_MEMORY_LIMIT" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_EPHEMERAL_STORAGE_LIMIT_GB=${STREAMING_EPHEMERAL_STORAGE_LIMIT_GB}Gi" | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SCHEDULER_STATE_LOCATION" | |
| if [ "${USE_EC2}" = "true" ]; then | |
| ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e EC2_SUBNET_ID -e EC2_SECURITY_GROUP_ID -e EC2_AMI_ID -e EC2_INSTANCE_TYPE -e EC2_DISK_SIZE_GB -e EC2_IAM_INSTANCE_PROFILE -e AWS_REGION=us-west-2" | |
| fi | |
| [ "${USE_PRIVATE_CLUSTER}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_ORGANIZATION_TAG=spicehq" | |
| [ -n "${CUSTOM_IMAGE_TAG}" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_TAG=${CUSTOM_IMAGE_TAG}" | |
| [ "${{ github.event.inputs.enable_spice_debug }}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SPICE_DEBUG=true" | |
| ADAPTER_ARGS="${ADAPTER_DOCKER_OPTS} ghcr.io/spiceai/spidapter:${{ github.event.inputs.spidapter_image_tag || 'latest' }} stdio --verbose" | |
| set -x | |
| ~/.spice/bin/spicebench run \ | |
| --concurrency "${NUM_QUERY_CLIENTS}" \ | |
| --scenario "${SCENARIO}" \ | |
| --executor-instance-type "${EXECUTOR_INSTANCE_TYPE}" \ | |
| ${ETL_ARGS} \ | |
| --etl-sink "${ETL_SINK}" \ | |
| --table-format parquet \ | |
| ${VALIDATION_ARGS} \ | |
| ${SUT_METRICS_ARGS} \ | |
| --system-adapter-stdio-cmd docker \ | |
| --system-adapter-stdio-args "${ADAPTER_ARGS}" \ | |
| --system-adapter-env "SCHEDULER_STATE_LOCATION=${SCHEDULER_STATE_LOCATION}" \ | |
| ${NO_TEARDOWN_ARG} |