@@ -5265,6 +5265,7 @@ typedef enum {
5265
5265
VEC0_METADATA_OPERATOR_LT = 'd' ,
5266
5266
VEC0_METADATA_OPERATOR_GE = 'e' ,
5267
5267
VEC0_METADATA_OPERATOR_NE = 'f' ,
5268
+ VEC0_METADATA_OPERATOR_IN = 'g' ,
5268
5269
} vec0_metadata_operator ;
5269
5270
5270
5271
static int vec0BestIndex (sqlite3_vtab * pVTab , sqlite3_index_info * pIdxInfo ) {
@@ -5498,7 +5499,33 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
5498
5499
5499
5500
switch (op ) {
5500
5501
case SQLITE_INDEX_CONSTRAINT_EQ : {
5501
- value = VEC0_METADATA_OPERATOR_EQ ;
5502
+ int vtabIn = 0 ;
5503
+ #if COMPILER_SUPPORTS_VTAB_IN
5504
+ if (sqlite3_libversion_number () >= 3038000 ) {
5505
+ vtabIn = sqlite3_vtab_in (pIdxInfo , i , -1 );
5506
+ }
5507
+ #endif
5508
+ if (vtabIn ) {
5509
+ switch (p -> metadata_columns [metadata_idx ].kind ) {
5510
+ case VEC0_METADATA_COLUMN_KIND_FLOAT :
5511
+ case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
5512
+ // IMP: TODO
5513
+ rc = SQLITE_ERROR ;
5514
+ vtab_set_error (pVTab , "'xxx in (...)' is only available on INTEGER or TEXT metadata columns." );
5515
+ goto done ;
5516
+ break ;
5517
+ }
5518
+ case VEC0_METADATA_COLUMN_KIND_INTEGER :
5519
+ case VEC0_METADATA_COLUMN_KIND_TEXT : {
5520
+ break ;
5521
+ }
5522
+ }
5523
+ value = VEC0_METADATA_OPERATOR_IN ;
5524
+ sqlite3_vtab_in (pIdxInfo , i , 1 );
5525
+ }
5526
+ else {
5527
+ value = VEC0_PARTITION_OPERATOR_EQ ;
5528
+ }
5502
5529
break ;
5503
5530
}
5504
5531
case SQLITE_INDEX_CONSTRAINT_GT : {
@@ -5852,7 +5879,24 @@ int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value
5852
5879
return rc ;
5853
5880
}
5854
5881
5855
- int vec0_metadata_filter_text (vec0_vtab * p , sqlite3_value * value , const void * buffer , int size , vec0_metadata_operator op , u8 * b , int metadata_idx , int chunk_rowid ) {
5882
+ // a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
5883
+ struct Vec0MetadataIn {
5884
+ // index of argv[i]` the constraint is on
5885
+ int argv_idx ;
5886
+ // metadata column index of the constraint, derived from idxStr + argv_idx
5887
+ int metadata_idx ;
5888
+ // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
5889
+ struct Array array ;
5890
+ };
5891
+
5892
+ // Array elements for `xxx in (...)` values for a text column. basically just a string
5893
+ struct Vec0MetadataInTextEntry {
5894
+ int n ;
5895
+ char * zString ;
5896
+ };
5897
+
5898
+
5899
+ int vec0_metadata_filter_text (vec0_vtab * p , sqlite3_value * value , const void * buffer , int size , vec0_metadata_operator op , u8 * b , int metadata_idx , int chunk_rowid , struct Array * aMetadataIn , int argv_idx ) {
5856
5900
int rc ;
5857
5901
sqlite3_stmt * stmt = NULL ;
5858
5902
i64 * rowids = NULL ;
@@ -6088,6 +6132,66 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
6088
6132
break ;
6089
6133
}
6090
6134
6135
+ case VEC0_METADATA_OPERATOR_IN : {
6136
+ size_t metadataInIdx = -1 ;
6137
+ for (size_t i = 0 ; i < aMetadataIn -> length ; i ++ ) {
6138
+ struct Vec0MetadataIn * metadataIn = & (((struct Vec0MetadataIn * ) aMetadataIn -> z )[i ]);
6139
+ if (metadataIn -> argv_idx == argv_idx ) {
6140
+ metadataInIdx = i ;
6141
+ break ;
6142
+ }
6143
+ }
6144
+ if (metadataInIdx < 0 ) {
6145
+ abort (); // TODO
6146
+ }
6147
+
6148
+ struct Vec0MetadataIn * metadataIn = & ((struct Vec0MetadataIn * ) aMetadataIn -> z )[metadataInIdx ];
6149
+ struct Array * aTarget = & (metadataIn -> array );
6150
+
6151
+
6152
+ int nPrefix ;
6153
+ char * sPrefix ;
6154
+ char * sFull ;
6155
+ int nFull ;
6156
+ u8 * view ;
6157
+ for (int i = 0 ; i < size ; i ++ ) {
6158
+ view = & ((u8 * ) buffer )[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ];
6159
+ nPrefix = ((int * ) view )[0 ];
6160
+ sPrefix = (char * ) & view [4 ];
6161
+ for (size_t target_idx = 0 ; target_idx < aTarget -> length ; target_idx ++ ) {
6162
+ struct Vec0MetadataInTextEntry * entry = & (((struct Vec0MetadataInTextEntry * )aTarget -> z )[target_idx ]);
6163
+ if (entry -> n != nPrefix ) {
6164
+ continue ;
6165
+ }
6166
+ int cmpPrefix = strncmp (sPrefix , entry -> zString , min (nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ));
6167
+ if (nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
6168
+ if (cmpPrefix == 0 ) {
6169
+ bitmap_set (b , i , 1 );
6170
+ break ;
6171
+ }
6172
+ continue ;
6173
+ }
6174
+ if (cmpPrefix ) {
6175
+ continue ;
6176
+ }
6177
+
6178
+ rc = vec0_get_metadata_text_long_value (p , & stmt , metadata_idx , rowids [i ], & nFull , & sFull );
6179
+ if (rc != SQLITE_OK ) {
6180
+ goto done ;
6181
+ }
6182
+ if (nPrefix != nFull ) {
6183
+ rc = SQLITE_ERROR ;
6184
+ goto done ;
6185
+ }
6186
+ if (strncmp (sFull , entry -> zString , nFull ) == 0 ) {
6187
+ bitmap_set (b , i , 1 );
6188
+ break ;
6189
+ }
6190
+ }
6191
+ }
6192
+ break ;
6193
+ }
6194
+
6091
6195
}
6092
6196
rc = SQLITE_OK ;
6093
6197
@@ -6118,7 +6222,8 @@ int vec0_set_metadata_filter_bitmap(
6118
6222
sqlite3_blob * blob ,
6119
6223
i64 chunk_rowid ,
6120
6224
u8 * b ,
6121
- int size ) {
6225
+ int size ,
6226
+ struct Array * aMetadataIn , int argv_idx ) {
6122
6227
// TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
6123
6228
6124
6229
int rc ;
@@ -6198,6 +6303,31 @@ int vec0_set_metadata_filter_bitmap(
6198
6303
for (int i = 0 ; i < size ; i ++ ) { bitmap_set (b , i , array [i ] != target ); }
6199
6304
break ;
6200
6305
}
6306
+ case VEC0_METADATA_OPERATOR_IN : {
6307
+ int metadataInIdx = -1 ;
6308
+ for (size_t i = 0 ; i < aMetadataIn -> length ; i ++ ) {
6309
+ struct Vec0MetadataIn * metadataIn = & ((struct Vec0MetadataIn * ) aMetadataIn -> z )[i ];
6310
+ if (metadataIn -> argv_idx == argv_idx ) {
6311
+ metadataInIdx = i ;
6312
+ break ;
6313
+ }
6314
+ }
6315
+ if (metadataInIdx < 0 ) {
6316
+ abort (); // TODO
6317
+ }
6318
+ struct Vec0MetadataIn * metadataIn = & ((struct Vec0MetadataIn * ) aMetadataIn -> z )[metadataInIdx ];
6319
+ struct Array * aTarget = & (metadataIn -> array );
6320
+
6321
+ for (int i = 0 ; i < size ; i ++ ) {
6322
+ for (size_t target_idx = 0 ; target_idx < aTarget -> length ; target_idx ++ ) {
6323
+ if ( ((i64 * )aTarget -> z )[target_idx ] == array [i ]) {
6324
+ bitmap_set (b , i , 1 );
6325
+ break ;
6326
+ }
6327
+ }
6328
+ }
6329
+ break ;
6330
+ }
6201
6331
}
6202
6332
break ;
6203
6333
}
@@ -6229,11 +6359,15 @@ int vec0_set_metadata_filter_bitmap(
6229
6359
for (int i = 0 ; i < size ; i ++ ) { bitmap_set (b , i , array [i ] != target ); }
6230
6360
break ;
6231
6361
}
6362
+ case VEC0_METADATA_OPERATOR_IN : {
6363
+ // should never be reached
6364
+ break ;
6365
+ }
6232
6366
}
6233
6367
break ;
6234
6368
}
6235
6369
case VEC0_METADATA_COLUMN_KIND_TEXT : {
6236
- rc = vec0_metadata_filter_text (p , value , buffer , size , op , b , metadata_idx , chunk_rowid );
6370
+ rc = vec0_metadata_filter_text (p , value , buffer , size , op , b , metadata_idx , chunk_rowid , aMetadataIn , argv_idx );
6237
6371
if (rc != SQLITE_OK ) {
6238
6372
goto done ;
6239
6373
}
@@ -6248,6 +6382,7 @@ int vec0_set_metadata_filter_bitmap(
6248
6382
int vec0Filter_knn_chunks_iter (vec0_vtab * p , sqlite3_stmt * stmtChunks ,
6249
6383
struct VectorColumnDefinition * vector_column ,
6250
6384
int vectorColumnIdx , struct Array * arrayRowidsIn ,
6385
+ struct Array * aMetadataIn ,
6251
6386
const char * idxStr , int argc , sqlite3_value * * argv ,
6252
6387
void * queryVector , i64 k , i64 * * out_topk_rowids ,
6253
6388
f32 * * out_topk_distances , i64 * out_used ) {
@@ -6472,7 +6607,7 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
6472
6607
}
6473
6608
6474
6609
bitmap_clear (bmMetadata , p -> chunk_size );
6475
- rc = vec0_set_metadata_filter_bitmap (p , metadata_idx , operator , argv [i ], metadataBlobs [metadata_idx ], chunk_id , bmMetadata , p -> chunk_size );
6610
+ rc = vec0_set_metadata_filter_bitmap (p , metadata_idx , operator , argv [i ], metadataBlobs [metadata_idx ], chunk_id , bmMetadata , p -> chunk_size , aMetadataIn , i );
6476
6611
if (rc != SQLITE_OK ) {
6477
6612
vtab_set_error (& p -> base , "Could not filter metadata fields" );
6478
6613
if (rc != SQLITE_OK ) {
@@ -6619,6 +6754,9 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
6619
6754
return SQLITE_NOMEM ;
6620
6755
}
6621
6756
memset (knn_data , 0 , sizeof (* knn_data ));
6757
+ // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
6758
+ struct Array * aMetadataIn = NULL ;
6759
+
6622
6760
6623
6761
int query_idx = -1 ;
6624
6762
int k_idx = -1 ;
@@ -6738,6 +6876,95 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
6738
6876
}
6739
6877
#endif
6740
6878
6879
+ #if COMPILER_SUPPORTS_VTAB_IN
6880
+ for (int i = 0 ; i < argc ; i ++ ) {
6881
+ if (!(idxStr [1 + (i * 4 )] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr [1 + (i * 4 ) + 2 ] == VEC0_METADATA_OPERATOR_IN )) {
6882
+ continue ;
6883
+ }
6884
+ int metadata_idx = idxStr [1 + (i * 4 ) + 1 ] - 'A' ;
6885
+ if (!aMetadataIn ) {
6886
+ aMetadataIn = sqlite3_malloc (sizeof (* aMetadataIn ));
6887
+ if (!aMetadataIn ) {
6888
+ rc = SQLITE_NOMEM ;
6889
+ goto cleanup ;
6890
+ }
6891
+ memset (aMetadataIn , 0 , sizeof (* aMetadataIn ));
6892
+ rc = array_init (aMetadataIn , sizeof (struct Vec0MetadataIn ), 8 );
6893
+ if (rc != SQLITE_OK ) {
6894
+ goto cleanup ;
6895
+ }
6896
+ }
6897
+
6898
+ struct Vec0MetadataIn item ;
6899
+ memset (& item , 0 , sizeof (item ));
6900
+ item .metadata_idx = metadata_idx ;
6901
+ item .argv_idx = i ;
6902
+
6903
+ switch (p -> metadata_columns [metadata_idx ].kind ) {
6904
+ case VEC0_METADATA_COLUMN_KIND_INTEGER : {
6905
+ rc = array_init (& item .array , sizeof (i64 ), 16 );
6906
+ if (rc != SQLITE_OK ) {
6907
+ goto cleanup ;
6908
+ }
6909
+ sqlite3_value * entry ;
6910
+ for (rc = sqlite3_vtab_in_first (argv [i ], & entry ); rc == SQLITE_OK && entry ; rc = sqlite3_vtab_in_next (argv [i ], & entry )) {
6911
+ i64 v = sqlite3_value_int64 (entry );
6912
+ rc = array_append (& item .array , & v );
6913
+ if (rc != SQLITE_OK ) {
6914
+ goto cleanup ;
6915
+ }
6916
+ }
6917
+
6918
+ if (rc != SQLITE_DONE ) {
6919
+ vtab_set_error (& p -> base , "fuck" ); // TODO
6920
+ goto cleanup ;
6921
+ }
6922
+
6923
+ break ;
6924
+ }
6925
+ case VEC0_METADATA_COLUMN_KIND_TEXT : {
6926
+ rc = array_init (& item .array , sizeof (struct Vec0MetadataInTextEntry ), 16 );
6927
+ if (rc != SQLITE_OK ) {
6928
+ goto cleanup ;
6929
+ }
6930
+ sqlite3_value * entry ;
6931
+ for (rc = sqlite3_vtab_in_first (argv [i ], & entry ); rc == SQLITE_OK && entry ; rc = sqlite3_vtab_in_next (argv [i ], & entry )) {
6932
+ const char * s = (const char * ) sqlite3_value_text (entry );
6933
+ int n = sqlite3_value_bytes (entry );
6934
+
6935
+ struct Vec0MetadataInTextEntry entry ;
6936
+ // TODO if this exits early, does it get properly cleaned up
6937
+ entry .zString = sqlite3_mprintf ("%.*s" , n , s );
6938
+ if (!entry .zString ) {
6939
+ rc = SQLITE_NOMEM ;
6940
+ goto cleanup ;
6941
+ }
6942
+ entry .n = n ;
6943
+ rc = array_append (& item .array , & entry );
6944
+ if (rc != SQLITE_OK ) {
6945
+ goto cleanup ;
6946
+ }
6947
+ }
6948
+
6949
+ if (rc != SQLITE_DONE ) {
6950
+ vtab_set_error (& p -> base , "fuck" ); // TODO
6951
+ goto cleanup ;
6952
+ }
6953
+
6954
+ break ;
6955
+ }
6956
+ default : {
6957
+ abort ();
6958
+ }
6959
+ }
6960
+
6961
+ rc = array_append (aMetadataIn , & item );
6962
+ if (rc != SQLITE_OK ) {
6963
+ abort (); // TODO
6964
+ }
6965
+ }
6966
+ #endif
6967
+
6741
6968
rc = vec0_chunks_iter (p , idxStr , argc , argv , & stmtChunks );
6742
6969
if (rc != SQLITE_OK ) {
6743
6970
// IMP: V06942_23781
@@ -6750,7 +6977,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
6750
6977
f32 * topk_distances = NULL ;
6751
6978
i64 k_used = 0 ;
6752
6979
rc = vec0Filter_knn_chunks_iter (p , stmtChunks , vector_column , vectorColumnIdx ,
6753
- arrayRowidsIn , idxStr , argc , argv , queryVector , k , & topk_rowids ,
6980
+ arrayRowidsIn , aMetadataIn , idxStr , argc , argv , queryVector , k , & topk_rowids ,
6754
6981
& topk_distances , & k_used );
6755
6982
if (rc != SQLITE_OK ) {
6756
6983
goto cleanup ;
@@ -6771,6 +6998,21 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
6771
6998
array_cleanup (arrayRowidsIn );
6772
6999
sqlite3_free (arrayRowidsIn );
6773
7000
queryVectorCleanup (queryVector );
7001
+ if (aMetadataIn ) {
7002
+ for (size_t i = 0 ; i < aMetadataIn -> length ; i ++ ) {
7003
+ struct Vec0MetadataIn * item = & ((struct Vec0MetadataIn * ) aMetadataIn -> z )[i ];
7004
+ for (size_t j = 0 ; j < item -> array .length ; j ++ ) {
7005
+ if (p -> metadata_columns [item -> metadata_idx ].kind == VEC0_METADATA_COLUMN_KIND_TEXT ) {
7006
+ struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry * )item -> array .z )[j ];
7007
+ sqlite3_free (entry .zString );
7008
+ }
7009
+ }
7010
+ array_cleanup (& item -> array );
7011
+ }
7012
+ array_cleanup (aMetadataIn );
7013
+ }
7014
+
7015
+ sqlite3_free (aMetadataIn );
6774
7016
6775
7017
return rc ;
6776
7018
}
@@ -7049,7 +7291,8 @@ static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
7049
7291
int metadata_idx = vec0_column_idx_to_metadata_idx (pVtab , i );
7050
7292
int rc = vec0_result_metadata_value_for_rowid (pVtab , rowid , metadata_idx , context );
7051
7293
if (rc != SQLITE_OK ) {
7052
- sqlite3_result_error (context , "fuck todo" , -1 );
7294
+ // TODO handle
7295
+ sqlite3_result_error (context , "fuck" , -1 );
7053
7296
}
7054
7297
}
7055
7298
return SQLITE_OK ;
@@ -7121,7 +7364,8 @@ static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
7121
7364
int metadata_idx = vec0_column_idx_to_metadata_idx (pVtab , i );
7122
7365
int rc = vec0_result_metadata_value_for_rowid (pVtab , rowid , metadata_idx , context );
7123
7366
if (rc != SQLITE_OK ) {
7124
- sqlite3_result_error (context , "fuck todo" , -1 );
7367
+ // TODO handle
7368
+ sqlite3_result_error (context , "fuck" , -1 );
7125
7369
}
7126
7370
}
7127
7371
@@ -7188,7 +7432,8 @@ static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
7188
7432
i64 rowid = pCur -> knn_data -> rowids [pCur -> knn_data -> current_idx ];
7189
7433
int rc = vec0_result_metadata_value_for_rowid (pVtab , rowid , metadata_idx , context );
7190
7434
if (rc != SQLITE_OK ) {
7191
- sqlite3_result_error (context , "fuck todo" , -1 );
7435
+ // TODO: handle
7436
+ sqlite3_result_error (context , "fuck" , -1 );
7192
7437
}
7193
7438
}
7194
7439
0 commit comments