search benchmark tests #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| name: search benchmark tests | |
| on: | |
| schedule: | |
| - cron: '0 6 * * 2,5' # Every Monday and Thursday at 8pm PST (6am UTC next day) | |
| workflow_dispatch: | |
| inputs: | |
| spiced_commit: | |
| description: 'spiced build commit' | |
| required: false | |
| type: string | |
| configuration: | |
| description: 'Configuration to run' | |
| required: false | |
| default: 'all' | |
| type: choice | |
| options: | |
| - 'all' | |
| - 'openai[text-embedding-3-small]-arrow' | |
| - 'openai[text-embedding-3-small]-cayenne[file]' | |
| - 'openai[text-embedding-3-small]-duckdb[file]' | |
| - 'openai[text-embedding-3-small[chunking]]-duckdb[file]' | |
| - 'openai[text-embedding-3-small]-s3_vectors' | |
| - 'full_text_search-cayenne[file]' | |
| - 'full_text_search-duckdb[file]' | |
| - 'hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]' | |
| - 'model2vec[potion-multilingual-128M]-duckdb[file]' | |
| - 'model2vec[potion-multilingual-128M]-cayenne[file]' | |
| concurrency: | |
| # Allow only one workflow per any non-trunk branch. | |
| group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'trunk' && github.sha || 'any-sha' }} | |
| cancel-in-progress: true | |
| jobs: | |
| # Using a matrix here allows us to easily run all or a specific configuration from the dropdown. | |
| # This is simpler than using testoperator dispatch for the current small set of configurations. | |
| # We'll incrementally switch to testoperator dispatch as more configurations are added. | |
| setup-matrix: | |
| name: Setup matrix | |
| runs-on: ubuntu-24.04 | |
| outputs: | |
| matrix: ${{ steps.setup-matrix.outputs.result }} | |
| steps: | |
| - name: Set up matrix | |
| uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 | |
| id: setup-matrix | |
| with: | |
| script: | | |
| const matrix = [ | |
| { name: "openai[text-embedding-3-small]-arrow" }, | |
| { name: "openai[text-embedding-3-small]-cayenne[file]" }, | |
| { name: "openai[text-embedding-3-small]-duckdb[file]" }, | |
| { name: "openai[text-embedding-3-small[chunking]]-duckdb[file]" }, | |
| { name: "openai[text-embedding-3-small]-s3_vectors" }, | |
| { name: "full_text_search-duckdb[file]" }, | |
| { name: "full_text_search-cayenne[file]" }, | |
| { name: "hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]" }, | |
| { name: "model2vec[potion-multilingual-128M]-duckdb[file]" }, | |
| { name: "model2vec[potion-multilingual-128M]-cayenne[file]" }, | |
| ]; | |
| const configuration = context.payload.inputs?.configuration || 'all'; | |
| let filtered = matrix; | |
| if (configuration !== 'all') { | |
| filtered = filtered.filter(m => m.name === configuration); | |
| } | |
| // include the spicepod path for each matrix item | |
| filtered = filtered.map((m) => { | |
| m.spicepod = "./test/spicepods/search/mteb/quora/" + m.name + ".yaml"; | |
| return m; | |
| }); | |
| return filtered; | |
| run-bench: | |
| name: Run ${{ matrix.target.name }} | |
| timeout-minutes: 60 | |
| runs-on: spiceai-dev-runners | |
| needs: | |
| - setup-matrix | |
| strategy: | |
| fail-fast: false | |
| # Limit to a single running instance to prevent embedding creation throttling when multiple configurations use the same model provider | |
| # https://github.com/spiceai/spiceai/issues/6653 | |
| max-parallel: 1 | |
| matrix: | |
| target: ${{ fromJson(needs.setup-matrix.outputs.matrix) }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| persist-credentials: false | |
| - name: Install MinIO | |
| uses: ./.github/actions/setup-minio | |
| with: | |
| minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }} | |
| minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }} | |
| minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }} | |
| - name: Setup spiced | |
| uses: ./.github/actions/setup-spiced | |
| id: setup-spiced | |
| with: | |
| spiced_commit: ${{ github.event.inputs.spiced_commit }} | |
| - name: Display spiced commit | |
| run: echo "SPICED_COMMIT=${{ steps.setup-spiced.outputs.SPICED_COMMIT }}" | |
| - name: Build Testoperator | |
| uses: ./.github/actions/build-testoperator | |
| with: | |
| minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }} | |
| minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }} | |
| minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }} | |
| - name: Run ${{ matrix.target.name }} | |
| run: | | |
| rm -rf .spice/data | |
| testoperator run search \ | |
| --ready-wait 1800 \ | |
| --concurrency 10 \ | |
| --benchmark-dataset quora_retrieval \ | |
| --metrics \ | |
| -p "${{ matrix.target.spicepod }}" | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.SPICE_SECRET_OPENAI_API_KEY }} | |
| AWS_S3_VECTORS_KEY: ${{ secrets.AWS_S3_VECTORS_KEY }} | |
| AWS_S3_VECTORS_SECRET: ${{ secrets.AWS_S3_VECTORS_SECRET }} | |
| SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }} | |
| SPICED_COMMIT: ${{ steps.setup-spiced.outputs.SPICED_COMMIT }} |