spiceai · krinart · Jun 14, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/.github/actions/build-spicebench/action.yml b/.github/actions/build-spicebench/action.yml
@@ -33,8 +33,8 @@ runs:
       shell: bash
       run: |
         mkdir -p ~/.spice/bin
-        cargo build -p spicebench
-        install -m 755 target/debug/spicebench ~/.spice/bin/spicebench
+        cargo build -p spicebench --release
+        install -m 755 target/release/spicebench ~/.spice/bin/spicebench
 
     - name: Save spicebench cache
       if: steps.build-spicebench.outcome == 'success'

diff --git a/.github/workflows/run_spicebench.yml b/.github/workflows/run_spicebench.yml
@@ -217,6 +217,12 @@ jobs:
             export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
             export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
             export SPICEBENCH_TARGET_BATCH_ROWS=500000
+            # ADBC writes are heavyweight (staging-parquet upload + MERGE), so
+            # splitting a segment into many small writes hurts: disable chunking
+            # (one write per segment) and let each table do 1 in-flight write with
+            # no global ceiling (tables ingest concurrently). Mirrors trunk.
+            export SPICEBENCH_SINK_CHUNK_ROWS=0
+            export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=1
             export SPICEBENCH_ADBC_MAX_INGEST_BATCH_BYTES=1268435456
             export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
             export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true

diff --git a/.github/workflows/run_spicebench_debug_spice_cloud.yml b/.github/workflows/run_spicebench_debug_spice_cloud.yml
@@ -283,6 +283,12 @@ jobs:
 
           export SPICEBENCH_ADBC_UPDATE_STRATEGY=${{ inputs.adbc_update_strategy || 'bulk_ingest_upsert' }}
           export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=50000
+          # ADBC sink (spidapter -> Cayenne): writes are heavyweight, so don't
+          # split a segment into many small writes — that floods the executor
+          # with tiny ingests (the prior crash). One write per segment, 1 in-flight
+          # write per table, no global ceiling (tables ingest concurrently).
+          export SPICEBENCH_SINK_CHUNK_ROWS=0
+          export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=1
           if [ "${SPICEBENCH_ADBC_UPDATE_STRATEGY}" = "bulk_ingest_upsert" ]; then
             export SPICEBENCH_ADBC_FLUSH_STREAM_BEFORE_UPSERT=true
           else

diff --git a/.github/workflows/run_spicebench_debug_streaming.yml b/.github/workflows/run_spicebench_debug_streaming.yml
@@ -52,6 +52,12 @@ on:
       disable_teardown:
         type: boolean
         default: false
+      run_tag:
+        type: string
+        default: ''
+      validate_results:
+        type: boolean
+        default: true
   workflow_dispatch:
     inputs:
       environment:
@@ -69,8 +75,8 @@ on:
         type: choice
         options:
           - postgres-wal
-          - dynamodb-streams
           - mongodb-streams
+          - dynamodb-streams
       acceleration:
         description: 'Acceleration engine'
         required: true
@@ -151,6 +157,16 @@ on:
         required: false
         default: false
         type: boolean
+      run_tag:
+        description: 'Optional suffix appended to the adapter_name metric label (e.g. "analyze-fix"). Used to differentiate runs in Grafana.'
+        required: false
+        default: ''
+        type: string
+      validate_results:
+        description: 'Validate checkpoint results (disable to measure raw ETL throughput without waiting for spiced convergence)'
+        required: false
+        default: true
+        type: boolean
 
 jobs:
   run-spicebench:
@@ -197,6 +213,7 @@ jobs:
           SPICE_CLOUD_API_URL: ${{ inputs.environment == 'production' && 'https://api.spice.ai' || 'https://dev-api.spice.ai' }}
           SYSTEM_UNDER_TEST: ${{ inputs.system_under_test }}
           SYSTEM_ADAPTER: ${{ inputs.system_under_test }}-${{ inputs.acceleration }}
+          RUN_TAG: ${{ inputs.run_tag || '' }}
           ACCELERATION: ${{ inputs.acceleration }}
           SCENARIO: ${{ inputs.scenario || 'tpch' }}
           SCALE_FACTOR: ${{ inputs.scale_factor || '1' }}
@@ -206,7 +223,7 @@ jobs:
           ETL_REGION: us-east-1
           ETL_SINK: adbc
           SCHEDULER_STATE_LOCATION: s3://spiceai-testing-cluster-state/spicebench-scheduler-state-${{ github.run_id }}/
-          VALIDATE_CHECKPOINT_RESULTS: 'true'
+          VALIDATE_CHECKPOINT_RESULTS: ${{ inputs.validate_results != false && 'true' || 'false' }}
           CHECKPOINT_VALIDATION_TIMEOUT: ${{ inputs.checkpoint_validation_timeout || '3600' }}
           SCRAPE_SUT_METRICS: 'true'
           SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
@@ -220,6 +237,7 @@ jobs:
           EC2_AMI_ID: ${{ vars.EC2_AMI_ID }}
           EC2_INSTANCE_TYPE: ${{ inputs.ec2_instance_type || vars.EC2_INSTANCE_TYPE }}
           EC2_IAM_INSTANCE_PROFILE: ${{ vars.EC2_IAM_INSTANCE_PROFILE }}
+          MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
           SPIDAPTER_APP_MEMORY_LIMIT: ${{ inputs.app_memory_limit || '64Gi' }}
           ENABLE_MODULE_DEBUG_LOGGING: ${{ inputs.enable_module_debug_logging || 'false' }}
           DISABLE_TEARDOWN: ${{ inputs.disable_teardown || 'false' }}
@@ -228,7 +246,7 @@ jobs:
         run: |
           set -euo pipefail
           if [ "${ENABLE_MODULE_DEBUG_LOGGING}" = "true" ]; then
-            export RUST_LOG='info,etl=debug,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug'
+            export RUST_LOG='info,spicebench=debug,data_generation=debug,etl::sink::adbc=debug,etl::sink::dynamodb=debug,etl::sink::mongodb=debug'
           else
             export RUST_LOG='info'
           fi
@@ -260,27 +278,31 @@ jobs:
           case "${SYSTEM_UNDER_TEST}" in
             postgres-wal)
               USE_EC2=true
-              export SPICEBENCH_TARGET_BATCH_ROWS=50000
+              export SPICEBENCH_TARGET_BATCH_ROWS=100000
+              export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=8
+              export SPICEBENCH_SINK_CHUNK_ROWS=20000
+              export SPICEBENCH_SINK_PARALLELISM=8
               export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=5000
               export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
               export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
               export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
               ;;
-            postgres-debezium)
-              USE_EC2=true
-              export SPICEBENCH_TARGET_BATCH_ROWS=50000
-              export SPICEBENCH_ADBC_DELETE_BATCH_SIZE=10000
-              export SPICEBENCH_ADBC_UPDATE_STRATEGY=staging_table
-              export SPICEBENCH_ADBC_REUSE_BULK_INGEST_STREAMS=false
-              export SPICEBENCH_ADBC_ANALYZE_STAGING_BEFORE_MERGE=true
-              ;;
             dynamodb-streams)
               USE_EC2=false
               export SPICEBENCH_TARGET_BATCH_ROWS=50000
               ;;
             mongodb-streams)
-              USE_EC2=true
-              export SPICEBENCH_TARGET_BATCH_ROWS=50000
+              # Connect to the existing MongoDB Atlas cluster (MONGODB_ATLAS_URI)
+              # instead of provisioning EC2 — no EC2 env vars needed.
+              USE_EC2=false
+              export SPICEBENCH_TARGET_BATCH_ROWS=320000
+              # Mongo writes are light and benefit from many concurrent small
+              # writes. Keep chunking; allow up to 32 in-flight writes per table
+              # (the new per-table default is 1, which would otherwise serialize
+              # a single hot table) and cap the aggregate at 32 globally.
+              export SPICEBENCH_SINK_CHUNK_ROWS=5000
+              export SPICEBENCH_SINK_PARALLELISM_PER_TABLE=64
+              export SPICEBENCH_SINK_PARALLELISM=64
               ;;
           esac
 
@@ -302,6 +324,12 @@ jobs:
             ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e EC2_SUBNET_ID -e EC2_SECURITY_GROUP_ID -e EC2_AMI_ID -e EC2_INSTANCE_TYPE -e EC2_DISK_SIZE_GB -e EC2_IAM_INSTANCE_PROFILE -e AWS_REGION=us-west-2"
           fi
 
+          # MongoDB connects to the existing Atlas cluster; pass the URI secret
+          # into the adapter container so the scenario can interpolate it.
+          if [ "${SYSTEM_UNDER_TEST}" = "mongodb-streams" ]; then
+            ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e MONGODB_ATLAS_URI"
+          fi
+
           [ "${USE_PRIVATE_CLUSTER}" = "true" ]                   && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_ORGANIZATION_TAG=spicehq"
           [ -n "${CUSTOM_IMAGE_TAG}" ]                            && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_IMAGE_TAG=${CUSTOM_IMAGE_TAG}"
           [ "${{ inputs.enable_spice_debug }}" = "true" ] && ADAPTER_DOCKER_OPTS="${ADAPTER_DOCKER_OPTS} -e SPIDAPTER_SPICE_DEBUG=true"