spiceai
diff --git a/‎.github/workflows/pr.yml‎
Lines changed: 76 additions & 0 deletions b/‎.github/workflows/pr.yml‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎.github/workflows/run_spicebench.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/run_spicebench.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎crates/adbc_client/src/lib.rs‎
Lines changed: 76 additions & 2 deletions b/‎crates/adbc_client/src/lib.rs‎
Lines changed: 76 additions & 2 deletions
@@ -84,3 +84,79 @@ jobs:
           git add -A
           git commit -m "chore: auto-fix cargo fmt + clippy"
           git push
+
+  validation-run:
+    name: Validation run (Spice Cloud TPCH sf0.001)
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    needs: changes
+    if: needs.changes.outputs.rust == 'true'
+    concurrency:
+      group: spicebench-run
+      cancel-in-progress: true
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: ./.github/actions/management-login
+        with:
+          client-id: ${{ secrets.SPICE_MANAGEMENT_CLIENT_ID }}
+          client-secret: ${{ secrets.SPICE_MANAGEMENT_CLIENT_SECRET }}
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Pull spidapter image
+        run: docker pull ghcr.io/spiceai/spidapter:latest
+
+      - uses: ./.github/actions/build-spicebench
+
+      - name: Build data-generation
+        run: |
+          mkdir -p ~/.spice/bin
+          cargo build -p data-generation
+          install -m 755 target/debug/data-generation ~/.spice/bin/data-generation
+
+      - name: Generate test data (sf 0.01)
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          RUST_LOG: info
+        run: |
+          ~/.spice/bin/data-generation run \
+            --dataset tpch \
+            --scenario tpch \
+            --version 0 \
+            --scale-factor 0.0001 \
+            --bucket spiceai-public-datasets \
+            --prefix pr-validation \
+            --max-concurrency 4 \
+            --region us-east-1
+
+      - name: Install ADBC driver
+        run: |
+          curl -LsSf https://dbc.columnar.tech/install.sh | sh
+          dbc install flightsql
+
+      - name: Run spicebench
+        env:
+          SPICEAI_API_KEY: ${{ env.SPICEAI_API_KEY }}
+          SPICE_CLOUD_API_URL: https://dev-api.spice.ai
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          SPIDAPTER_ICEBERG_REGION: us-west-1
+          SPIDAPTER_ICEBERG_CATALOG_FROM: iceberg:https://glue.us-west-1.amazonaws.com/iceberg/v1/catalogs/211125479522/namespaces
+          RUST_LOG: info
+        run: |
+          ~/.spice/bin/spicebench \
+            --concurrency 1 \
+            --scenario tpch \
+            --etl-bucket spiceai-public-datasets \
+            --etl-prefix pr-validation \
+            --etl-version 0 \
+            --etl-region us-east-1 \
+            --system-adapter-stdio-cmd docker \
+            --system-adapter-stdio-args "run -i -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM ghcr.io/spiceai/spidapter:latest stdio --verbose --channel nightly"
@@ -39,7 +39,7 @@ on:
         description: 'Version identifier for the data generation to read from'
         required: true
         type: string
-        default: "1"
+        default: '1'
       etl_region:
         description: 'AWS region for the ETL S3 bucket'
         required: false
@@ -70,6 +70,8 @@ jobs:
     name: Run spicebench
     runs-on: ubuntu-latest
     timeout-minutes: 600
+    concurrency:
+      group: spicebench-run
     steps:
       - uses: actions/checkout@v6
 
 
@@ -17,10 +17,12 @@ limitations under the License.
 pub mod databricks;
 pub mod spiceai;
 
+pub use adbc_core::options::IngestMode;
+
 use std::collections::HashMap;
 
-use adbc_core::options::{AdbcVersion, OptionDatabase, OptionValue};
-use adbc_core::{Connection, Database, Driver, LOAD_FLAG_DEFAULT, Statement};
+use adbc_core::options::{self, AdbcVersion, OptionDatabase, OptionValue};
+use adbc_core::{Connection, Database, Driver, LOAD_FLAG_DEFAULT, Optionable, Statement};
 use adbc_driver_manager::ManagedDriver;
 use arrow_array::RecordBatch;
 use snafu::prelude::*;
@@ -118,4 +120,76 @@ impl AdbcConnection {
             .collect::<std::result::Result<Vec<_>, _>>()
             .context(ReadBatchSnafu)
     }
+
+    /// Bulk-ingest a [`RecordBatch`] into a target table using the ADBC bulk
+    /// ingest API.
+    ///
+    /// This binds the batch directly to a statement configured with the
+    /// target table and ingest mode, avoiding the overhead of constructing
+    /// individual SQL INSERT statements.
+    pub fn bulk_ingest(
+        &mut self,
+        target_table: &str,
+        target_db_schema: Option<&str>,
+        mode: options::IngestMode,
+        batch: RecordBatch,
+    ) -> Result<Option<i64>> {
+        self.bulk_ingest_stream(
+            target_table,
+            target_db_schema,
+            mode,
+            Box::new(arrow_array::RecordBatchIterator::new(
+                std::iter::once(Ok(batch.clone())),
+                batch.schema(),
+            )),
+        )
+    }
+
+    /// Bulk-ingest a stream of [`RecordBatch`]es into a target table using a
+    /// single ADBC statement with `bind_stream`.
+    ///
+    /// This is more efficient than calling [`bulk_ingest`](Self::bulk_ingest)
+    /// per batch because it reuses the same statement and network connection.
+    pub fn bulk_ingest_stream(
+        &mut self,
+        target_table: &str,
+        target_db_schema: Option<&str>,
+        mode: options::IngestMode,
+        reader: Box<dyn arrow_array::RecordBatchReader + Send>,
+    ) -> Result<Option<i64>> {
+        let mut stmt = self.conn.new_statement().map_err(|e| Error::ExecuteQuery {
+            reason: e.to_string(),
+        })?;
+
+        stmt.set_option(
+            options::OptionStatement::TargetTable,
+            OptionValue::from(target_table),
+        )
+        .map_err(|e| Error::ExecuteQuery {
+            reason: format!("Failed to set target table: {e}"),
+        })?;
+
+        if let Some(schema) = target_db_schema {
+            stmt.set_option(
+                options::OptionStatement::TargetDbSchema,
+                OptionValue::from(schema),
+            )
+            .map_err(|e| Error::ExecuteQuery {
+                reason: format!("Failed to set target db schema: {e}"),
+            })?;
+        }
+
+        stmt.set_option(options::OptionStatement::IngestMode, mode.into())
+            .map_err(|e| Error::ExecuteQuery {
+                reason: format!("Failed to set ingest mode: {e}"),
+            })?;
+
+        stmt.bind_stream(reader).map_err(|e| Error::ExecuteQuery {
+            reason: format!("Failed to bind stream for bulk ingest: {e}"),
+        })?;
+
+        stmt.execute_update().map_err(|e| Error::ExecuteQuery {
+            reason: format!("Bulk ingest execution failed: {e}"),
+        })
+    }
 }