26
26
spawn_vsb_pinecone ,
27
27
)
28
28
from test_pgvector import spawn_vsb_pgvector
29
+ from test_opensearch import spawn_vsb_opensearch
29
30
30
31
31
- @pytest .mark .parametrize ("spawn_vsb" , [spawn_vsb_pgvector , spawn_vsb_pinecone ])
32
+ @pytest .mark .parametrize (
33
+ "spawn_vsb" , [spawn_vsb_pgvector , spawn_vsb_pinecone , spawn_vsb_opensearch ]
34
+ )
32
35
class TestCommon :
33
36
34
37
# Unfortunately pytest won't let us selectively parametrize with fixtures, so
@@ -52,8 +55,8 @@ def test_mnist_single(
52
55
check_request_counts (
53
56
stdout ,
54
57
{
55
- # Populate num_requests counts batches, not individual records.
56
- "Populate" : {"num_requests" : lambda x : x <= 2 , "num_failures" : 0 },
58
+ # Populate num_requests counts batches, not individual records (600) .
59
+ "Populate" : {"num_requests" : lambda x : x < 600 , "num_failures" : 0 },
57
60
"Search" : {
58
61
"num_requests" : 20 ,
59
62
"num_failures" : 0 ,
@@ -82,10 +85,10 @@ def test_mnist_concurrent(
82
85
stdout ,
83
86
{
84
87
# For multiple users the populate phase will chunk the records to be
85
- # loaded into num_users chunks - i.e. 4 here. Given the size of each
86
- # chunk will be less than the batch size (600 / 4 < 1000), then the
87
- # number of requests will be equal to the number of users - i.e. 4
88
- "Populate" : {"num_requests" : 4 , "num_failures" : 0 },
88
+ # loaded into num_users chunks - i.e. 4 here. Different DBs
89
+ # use different batch sizes, so just check we have fewer than
90
+ # number of records (600) / number of users (4).
91
+ "Populate" : {"num_requests" : lambda x : x < 600 / 4 , "num_failures" : 0 },
89
92
"Search" : {
90
93
"num_requests" : 20 ,
91
94
"num_failures" : 0 ,
@@ -114,10 +117,10 @@ def test_mnist_multiprocess(
114
117
stdout ,
115
118
{
116
119
# For multiple users the populate phase will chunk the records to be
117
- # loaded into num_users chunks - i.e. 4 here. Given the size of each
118
- # chunk will be less than the batch size (600 / 4 < 1000), then the
119
- # number of requests will be equal to the number of users - i.e. 4
120
- "Populate" : {"num_requests" : 4 , "num_failures" : 0 },
120
+ # loaded into num_users chunks - i.e. 4 here. Different DBs
121
+ # use different batch sizes, so just check we have fewer than
122
+ # number of records (600) / number of users (4).
123
+ "Populate" : {"num_requests" : lambda x : x < 600 / 4 , "num_failures" : 0 },
121
124
# The number of Search requests should equal the number in the dataset
122
125
# (20 for mnist-test).
123
126
"Search" : {
@@ -145,15 +148,22 @@ def test_mnist_double(
145
148
check_request_counts (
146
149
stdout ,
147
150
{
148
- "test1.Populate" : {"num_requests" : lambda x : x <= 2 , "num_failures" : 0 },
151
+ # Populate num_requests counts batches, not individual records (600).
152
+ "test1.Populate" : {
153
+ "num_requests" : lambda x : x < 600 ,
154
+ "num_failures" : 0 ,
155
+ },
149
156
# The number of Search requests should equal the number in the dataset
150
157
# (20 for mnist-test).
151
158
"test1.Search" : {
152
159
"num_requests" : 20 ,
153
160
"num_failures" : 0 ,
154
161
"Recall" : check_recall_stats ,
155
162
},
156
- "test2.Populate" : {"num_requests" : lambda x : x <= 2 , "num_failures" : 0 },
163
+ "test2.Populate" : {
164
+ "num_requests" : lambda x : x < 600 ,
165
+ "num_failures" : 0 ,
166
+ },
157
167
"test2.Search" : {
158
168
"num_requests" : 20 ,
159
169
"num_failures" : 0 ,
@@ -182,18 +192,24 @@ def test_mnist_double_concurrent(
182
192
stdout ,
183
193
{
184
194
# For multiple users the populate phase will chunk the records to be
185
- # loaded into num_users chunks - i.e. 4 here. Given the size of each
186
- # chunk will be less than the batch size (600 / 4 < 200), then the
187
- # number of requests will be equal to the number of users - i.e. 4
188
- "test1.Populate" : {"num_requests" : 4 , "num_failures" : 0 },
195
+ # loaded into num_users chunks - i.e. 4 here. Different DBs
196
+ # use different batch sizes, so just check we have fewer than
197
+ # number of records (600) / number of users (4).
198
+ "test1.Populate" : {
199
+ "num_requests" : lambda x : x < 600 / 4 ,
200
+ "num_failures" : 0 ,
201
+ },
189
202
# The number of Search requests should equal the number in the dataset
190
203
# (20 for mnist-test).
191
204
"test1.Search" : {
192
205
"num_requests" : 20 ,
193
206
"num_failures" : 0 ,
194
207
"Recall" : check_recall_stats ,
195
208
},
196
- "test2.Populate" : {"num_requests" : 4 , "num_failures" : 0 },
209
+ "test2.Populate" : {
210
+ "num_requests" : lambda x : x < 600 / 4 ,
211
+ "num_failures" : 0 ,
212
+ },
197
213
"test2.Search" : {
198
214
"num_requests" : 20 ,
199
215
"num_failures" : 0 ,
@@ -222,18 +238,28 @@ def test_mnist_double_multiprocess(
222
238
stdout ,
223
239
{
224
240
# For multiple users the populate phase will chunk the records to be
225
- # loaded into num_users chunks - i.e. 4 here. Given the size of each
226
- # chunk will be less than the batch size (600 / 4 < 200), then the
227
- # number of requests will be equal to the number of users - i.e. 4
228
- "test1.Populate" : {"num_requests" : 4 , "num_failures" : 0 },
241
+ # loaded into num_users chunks - i.e. 4 here. Different DBs
242
+ # use different batch sizes, so just check we have fewer than
243
+ # number of records (600) / number of users (4).
244
+ "test1.Populate" : {
245
+ "num_requests" : lambda x : x < 600 / 4 ,
246
+ "num_failures" : 0 ,
247
+ },
229
248
# The number of Search requests should equal the number in the dataset
230
249
# (20 for mnist-test).
231
250
"test1.Search" : {
232
251
"num_requests" : 20 ,
233
252
"num_failures" : 0 ,
234
253
"Recall" : check_recall_stats ,
235
254
},
236
- "test2.Populate" : {"num_requests" : 4 , "num_failures" : 0 },
255
+ # For multiple users the populate phase will chunk the records to be
256
+ # loaded into num_users chunks - i.e. 4 here. Different DBs
257
+ # use different batch sizes, so just check we have fewer than
258
+ # number of records (600) / number of users (4).
259
+ "test2.Populate" : {
260
+ "num_requests" : lambda x : x < 600 / 4 ,
261
+ "num_failures" : 0 ,
262
+ },
237
263
"test2.Search" : {
238
264
"num_requests" : 20 ,
239
265
"num_failures" : 0 ,
@@ -262,8 +288,8 @@ def test_mnist_skip_populate(
262
288
check_request_counts (
263
289
stdout ,
264
290
{
265
- # Populate num_requests counts batches, not individual records.
266
- "Populate" : {"num_requests" : lambda x : x <= 2 , "num_failures" : 0 },
291
+ # Populate num_requests counts batches, not individual records (600) .
292
+ "Populate" : {"num_requests" : lambda x : x < 600 , "num_failures" : 0 },
267
293
"Search" : {"num_requests" : 20 , "num_failures" : 0 },
268
294
},
269
295
)
@@ -307,7 +333,7 @@ def test_filtered(
307
333
{
308
334
# Populate num_requests counts batches, not individual records.
309
335
"Populate" : {
310
- "num_requests" : lambda x : x == 10 or x == 210 ,
336
+ "num_requests" : lambda x : x > 1 and x < 10000 ,
311
337
"num_failures" : 0 ,
312
338
},
313
339
"Search" : {
@@ -335,7 +361,7 @@ def test_synthetic(
335
361
check_request_counts (
336
362
stdout ,
337
363
{
338
- "Populate" : {"num_requests" : 10 , " num_failures" : 0 },
364
+ "Populate" : {"num_failures" : 0 },
339
365
"Search" : {
340
366
"num_requests" : 100 ,
341
367
"num_failures" : 0 ,
@@ -367,7 +393,7 @@ def test_synthetic_runbook(
367
393
check_request_counts (
368
394
stdout ,
369
395
{
370
- "Populate" : {"num_requests" : lambda x : x <= 4 , " num_failures" : 0 },
396
+ "Populate" : {"num_failures" : 0 },
371
397
"Search" : {
372
398
"num_requests" : 500 ,
373
399
"num_failures" : 0 ,
@@ -382,6 +408,12 @@ def test_synthetic_proportional(
382
408
pinecone_api_key ,
383
409
pinecone_index_synthetic ,
384
410
):
411
+ if spawn_vsb == spawn_vsb_opensearch :
412
+ pytest .skip (
413
+ "Synthetic proportional test not supported on OpenSearch ("
414
+ "fetch_batch not yet implemented for OpenSearch)"
415
+ )
416
+
385
417
(proc , stdout , stderr ) = spawn_vsb (
386
418
pinecone_api_key = pinecone_api_key ,
387
419
pinecone_index = pinecone_index_synthetic ,
@@ -404,7 +436,7 @@ def test_synthetic_proportional(
404
436
check_request_counts (
405
437
stdout ,
406
438
{
407
- "Populate" : {"num_requests" : lambda x : x <= 4 , " num_failures" : 0 },
439
+ "Populate" : {"num_failures" : 0 },
408
440
"Search" : {
409
441
"num_requests" : lambda x : (x >= 150 and x <= 250 ),
410
442
"num_failures" : 0 ,
0 commit comments