Skip to content

search benchmark tests #2

search benchmark tests

search benchmark tests #2

---
name: search benchmark tests
on:
schedule:
- cron: '0 6 * * 2,5' # Every Monday and Thursday at 8pm PST (6am UTC next day)
workflow_dispatch:
inputs:
spiced_commit:
description: 'spiced build commit'
required: false
type: string
configuration:
description: 'Configuration to run'
required: false
default: 'all'
type: choice
options:
- 'all'
- 'openai[text-embedding-3-small]-arrow'
- 'openai[text-embedding-3-small]-cayenne[file]'
- 'openai[text-embedding-3-small]-duckdb[file]'
- 'openai[text-embedding-3-small[chunking]]-duckdb[file]'
- 'openai[text-embedding-3-small]-s3_vectors'
- 'full_text_search-cayenne[file]'
- 'full_text_search-duckdb[file]'
- 'hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]'
- 'model2vec[potion-multilingual-128M]-duckdb[file]'
- 'model2vec[potion-multilingual-128M]-cayenne[file]'
concurrency:
# Allow only one workflow per any non-trunk branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'trunk' && github.sha || 'any-sha' }}
cancel-in-progress: true
jobs:
# Using a matrix here allows us to easily run all or a specific configuration from the dropdown.
# This is simpler than using testoperator dispatch for the current small set of configurations.
# We'll incrementally switch to testoperator dispatch as more configurations are added.
setup-matrix:
name: Setup matrix
runs-on: ubuntu-24.04
outputs:
matrix: ${{ steps.setup-matrix.outputs.result }}
steps:
- name: Set up matrix
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
id: setup-matrix
with:
script: |
const matrix = [
{ name: "openai[text-embedding-3-small]-arrow" },
{ name: "openai[text-embedding-3-small]-cayenne[file]" },
{ name: "openai[text-embedding-3-small]-duckdb[file]" },
{ name: "openai[text-embedding-3-small[chunking]]-duckdb[file]" },
{ name: "openai[text-embedding-3-small]-s3_vectors" },
{ name: "full_text_search-duckdb[file]" },
{ name: "full_text_search-cayenne[file]" },
{ name: "hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]" },
{ name: "model2vec[potion-multilingual-128M]-duckdb[file]" },
{ name: "model2vec[potion-multilingual-128M]-cayenne[file]" },
];
const configuration = context.payload.inputs?.configuration || 'all';
let filtered = matrix;
if (configuration !== 'all') {
filtered = filtered.filter(m => m.name === configuration);
}
// include the spicepod path for each matrix item
filtered = filtered.map((m) => {
m.spicepod = "./test/spicepods/search/mteb/quora/" + m.name + ".yaml";
return m;
});
return filtered;
run-bench:
name: Run ${{ matrix.target.name }}
timeout-minutes: 60
runs-on: spiceai-dev-runners
needs:
- setup-matrix
strategy:
fail-fast: false
# Limit to a single running instance to prevent embedding creation throttling when multiple configurations use the same model provider
# https://github.com/spiceai/spiceai/issues/6653
max-parallel: 1
matrix:
target: ${{ fromJson(needs.setup-matrix.outputs.matrix) }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
- name: Install MinIO
uses: ./.github/actions/setup-minio
with:
minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }}
minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }}
minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }}
- name: Setup spiced
uses: ./.github/actions/setup-spiced
id: setup-spiced
with:
spiced_commit: ${{ github.event.inputs.spiced_commit }}
- name: Display spiced commit
run: echo "SPICED_COMMIT=${{ steps.setup-spiced.outputs.SPICED_COMMIT }}"
- name: Build Testoperator
uses: ./.github/actions/build-testoperator
with:
minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }}
minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }}
minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }}
- name: Run ${{ matrix.target.name }}
run: |
rm -rf .spice/data
testoperator run search \
--ready-wait 1800 \
--concurrency 10 \
--benchmark-dataset quora_retrieval \
--metrics \
-p "${{ matrix.target.spicepod }}"
env:
OPENAI_API_KEY: ${{ secrets.SPICE_SECRET_OPENAI_API_KEY }}
AWS_S3_VECTORS_KEY: ${{ secrets.AWS_S3_VECTORS_KEY }}
AWS_S3_VECTORS_SECRET: ${{ secrets.AWS_S3_VECTORS_SECRET }}
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
SPICED_COMMIT: ${{ steps.setup-spiced.outputs.SPICED_COMMIT }}