forked from spiceai/spiceai
-
Notifications
You must be signed in to change notification settings - Fork 0
136 lines (122 loc) · 5.18 KB
/
benchmarks_search.yml
File metadata and controls
136 lines (122 loc) · 5.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
---
name: search benchmark tests
on:
schedule:
- cron: '0 6 * * 2,5' # Every Monday and Thursday at 8pm PST (6am UTC next day)
workflow_dispatch:
inputs:
spiced_commit:
description: 'spiced build commit'
required: false
type: string
configuration:
description: 'Configuration to run'
required: false
default: 'all'
type: choice
options:
- 'all'
- 'openai[text-embedding-3-small]-arrow'
- 'openai[text-embedding-3-small]-cayenne[file]'
- 'openai[text-embedding-3-small]-duckdb[file]'
- 'openai[text-embedding-3-small[chunking]]-duckdb[file]'
- 'openai[text-embedding-3-small]-s3_vectors'
- 'full_text_search-cayenne[file]'
- 'full_text_search-duckdb[file]'
- 'hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]'
- 'model2vec[potion-multilingual-128M]-duckdb[file]'
- 'model2vec[potion-multilingual-128M]-cayenne[file]'
concurrency:
# Allow only one workflow per any non-trunk branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'trunk' && github.sha || 'any-sha' }}
cancel-in-progress: true
jobs:
# Using a matrix here allows us to easily run all or a specific configuration from the dropdown.
# This is simpler than using testoperator dispatch for the current small set of configurations.
# We'll incrementally switch to testoperator dispatch as more configurations are added.
setup-matrix:
name: Setup matrix
runs-on: ubuntu-24.04
outputs:
matrix: ${{ steps.setup-matrix.outputs.result }}
steps:
- name: Set up matrix
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
id: setup-matrix
with:
script: |
const matrix = [
{ name: "openai[text-embedding-3-small]-arrow" },
{ name: "openai[text-embedding-3-small]-cayenne[file]" },
{ name: "openai[text-embedding-3-small]-duckdb[file]" },
{ name: "openai[text-embedding-3-small[chunking]]-duckdb[file]" },
{ name: "openai[text-embedding-3-small]-s3_vectors" },
{ name: "full_text_search-duckdb[file]" },
{ name: "full_text_search-cayenne[file]" },
{ name: "hybrid[model2vec[potion-multilingual-128M]]-duckdb[file]" },
{ name: "model2vec[potion-multilingual-128M]-duckdb[file]" },
{ name: "model2vec[potion-multilingual-128M]-cayenne[file]" },
];
const configuration = context.payload.inputs?.configuration || 'all';
let filtered = matrix;
if (configuration !== 'all') {
filtered = filtered.filter(m => m.name === configuration);
}
// include the spicepod path for each matrix item
filtered = filtered.map((m) => {
m.spicepod = "./test/spicepods/search/mteb/quora/" + m.name + ".yaml";
return m;
});
return filtered;
run-bench:
name: Run ${{ matrix.target.name }}
timeout-minutes: 60
runs-on: spiceai-dev-runners
needs:
- setup-matrix
strategy:
fail-fast: false
# Limit to a single running instance to prevent embedding creation throttling when multiple configurations use the same model provider
# https://github.com/spiceai/spiceai/issues/6653
max-parallel: 1
matrix:
target: ${{ fromJson(needs.setup-matrix.outputs.matrix) }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
- name: Install MinIO
uses: ./.github/actions/setup-minio
with:
minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }}
minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }}
minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }}
- name: Setup spiced
uses: ./.github/actions/setup-spiced
id: setup-spiced
with:
spiced_commit: ${{ github.event.inputs.spiced_commit }}
- name: Display spiced commit
run: echo "SPICED_COMMIT=${{ steps.setup-spiced.outputs.SPICED_COMMIT }}"
- name: Build Testoperator
uses: ./.github/actions/build-testoperator
with:
minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }}
minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }}
minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }}
- name: Run ${{ matrix.target.name }}
run: |
rm -rf .spice/data
testoperator run search \
--ready-wait 1800 \
--concurrency 10 \
--benchmark-dataset quora_retrieval \
--metrics \
-p "${{ matrix.target.spicepod }}"
env:
OPENAI_API_KEY: ${{ secrets.SPICE_SECRET_OPENAI_API_KEY }}
AWS_S3_VECTORS_KEY: ${{ secrets.AWS_S3_VECTORS_KEY }}
AWS_S3_VECTORS_SECRET: ${{ secrets.AWS_S3_VECTORS_SECRET }}
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
SPICED_COMMIT: ${{ steps.setup-spiced.outputs.SPICED_COMMIT }}