Skip to content

Commit 7b67c78

Browse files
committed
vtab_in handling
1 parent 0db2e52 commit 7b67c78

File tree

6 files changed

+646
-9
lines changed

6 files changed

+646
-9
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ tmp/
2828
poetry.lock
2929

3030
*.jsonl
31+
32+
memstat.c
33+
memstat.*

TODO

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@
2222
- remaining TODO items
2323
- skip invalid validity entries in knn filter?
2424
- dictionary encoding?
25+
- partition `x in (...)` handling

sqlite-vec.c

Lines changed: 254 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5265,6 +5265,7 @@ typedef enum {
52655265
VEC0_METADATA_OPERATOR_LT = 'd',
52665266
VEC0_METADATA_OPERATOR_GE = 'e',
52675267
VEC0_METADATA_OPERATOR_NE = 'f',
5268+
VEC0_METADATA_OPERATOR_IN = 'g',
52685269
} vec0_metadata_operator;
52695270

52705271
static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
@@ -5498,7 +5499,33 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
54985499

54995500
switch(op) {
55005501
case SQLITE_INDEX_CONSTRAINT_EQ: {
5501-
value = VEC0_METADATA_OPERATOR_EQ;
5502+
int vtabIn = 0;
5503+
#if COMPILER_SUPPORTS_VTAB_IN
5504+
if (sqlite3_libversion_number() >= 3038000) {
5505+
vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
5506+
}
5507+
#endif
5508+
if(vtabIn) {
5509+
switch(p->metadata_columns[metadata_idx].kind) {
5510+
case VEC0_METADATA_COLUMN_KIND_FLOAT:
5511+
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
5512+
// IMP: TODO
5513+
rc = SQLITE_ERROR;
5514+
vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.");
5515+
goto done;
5516+
break;
5517+
}
5518+
case VEC0_METADATA_COLUMN_KIND_INTEGER:
5519+
case VEC0_METADATA_COLUMN_KIND_TEXT: {
5520+
break;
5521+
}
5522+
}
5523+
value = VEC0_METADATA_OPERATOR_IN;
5524+
sqlite3_vtab_in(pIdxInfo, i, 1);
5525+
}
5526+
else {
5527+
value = VEC0_PARTITION_OPERATOR_EQ;
5528+
}
55025529
break;
55035530
}
55045531
case SQLITE_INDEX_CONSTRAINT_GT: {
@@ -5852,7 +5879,24 @@ int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value
58525879
return rc;
58535880
}
58545881

5855-
int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid) {
5882+
// a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
5883+
struct Vec0MetadataIn{
5884+
// index of argv[i]` the constraint is on
5885+
int argv_idx;
5886+
// metadata column index of the constraint, derived from idxStr + argv_idx
5887+
int metadata_idx;
5888+
// array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
5889+
struct Array array;
5890+
};
5891+
5892+
// Array elements for `xxx in (...)` values for a text column. basically just a string
5893+
struct Vec0MetadataInTextEntry {
5894+
int n;
5895+
char * zString;
5896+
};
5897+
5898+
5899+
int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) {
58565900
int rc;
58575901
sqlite3_stmt * stmt = NULL;
58585902
i64 * rowids = NULL;
@@ -6088,6 +6132,66 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
60886132
break;
60896133
}
60906134

6135+
case VEC0_METADATA_OPERATOR_IN: {
6136+
size_t metadataInIdx = -1;
6137+
for(size_t i = 0; i < aMetadataIn->length; i++) {
6138+
struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]);
6139+
if(metadataIn->argv_idx == argv_idx) {
6140+
metadataInIdx = i;
6141+
break;
6142+
}
6143+
}
6144+
if(metadataInIdx < 0) {
6145+
abort(); // TODO
6146+
}
6147+
6148+
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
6149+
struct Array * aTarget = &(metadataIn->array);
6150+
6151+
6152+
int nPrefix;
6153+
char * sPrefix;
6154+
char *sFull;
6155+
int nFull;
6156+
u8 * view;
6157+
for(int i = 0; i < size; i++) {
6158+
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
6159+
nPrefix = ((int*) view)[0];
6160+
sPrefix = (char *) &view[4];
6161+
for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
6162+
struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]);
6163+
if(entry->n != nPrefix) {
6164+
continue;
6165+
}
6166+
int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
6167+
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
6168+
if(cmpPrefix == 0) {
6169+
bitmap_set(b, i, 1);
6170+
break;
6171+
}
6172+
continue;
6173+
}
6174+
if(cmpPrefix) {
6175+
continue;
6176+
}
6177+
6178+
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6179+
if(rc != SQLITE_OK) {
6180+
goto done;
6181+
}
6182+
if(nPrefix != nFull) {
6183+
rc = SQLITE_ERROR;
6184+
goto done;
6185+
}
6186+
if(strncmp(sFull, entry->zString, nFull) == 0) {
6187+
bitmap_set(b, i, 1);
6188+
break;
6189+
}
6190+
}
6191+
}
6192+
break;
6193+
}
6194+
60916195
}
60926196
rc = SQLITE_OK;
60936197

@@ -6118,7 +6222,8 @@ int vec0_set_metadata_filter_bitmap(
61186222
sqlite3_blob * blob,
61196223
i64 chunk_rowid,
61206224
u8* b,
6121-
int size) {
6225+
int size,
6226+
struct Array * aMetadataIn, int argv_idx) {
61226227
// TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
61236228

61246229
int rc;
@@ -6198,6 +6303,31 @@ int vec0_set_metadata_filter_bitmap(
61986303
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
61996304
break;
62006305
}
6306+
case VEC0_METADATA_OPERATOR_IN: {
6307+
int metadataInIdx = -1;
6308+
for(size_t i = 0; i < aMetadataIn->length; i++) {
6309+
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
6310+
if(metadataIn->argv_idx == argv_idx) {
6311+
metadataInIdx = i;
6312+
break;
6313+
}
6314+
}
6315+
if(metadataInIdx < 0) {
6316+
abort(); // TODO
6317+
}
6318+
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
6319+
struct Array * aTarget = &(metadataIn->array);
6320+
6321+
for(int i = 0; i < size; i++) {
6322+
for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
6323+
if( ((i64*)aTarget->z)[target_idx] == array[i]) {
6324+
bitmap_set(b, i, 1);
6325+
break;
6326+
}
6327+
}
6328+
}
6329+
break;
6330+
}
62016331
}
62026332
break;
62036333
}
@@ -6229,11 +6359,15 @@ int vec0_set_metadata_filter_bitmap(
62296359
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
62306360
break;
62316361
}
6362+
case VEC0_METADATA_OPERATOR_IN: {
6363+
// should never be reached
6364+
break;
6365+
}
62326366
}
62336367
break;
62346368
}
62356369
case VEC0_METADATA_COLUMN_KIND_TEXT: {
6236-
rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid);
6370+
rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx);
62376371
if(rc != SQLITE_OK) {
62386372
goto done;
62396373
}
@@ -6248,6 +6382,7 @@ int vec0_set_metadata_filter_bitmap(
62486382
int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
62496383
struct VectorColumnDefinition *vector_column,
62506384
int vectorColumnIdx, struct Array *arrayRowidsIn,
6385+
struct Array * aMetadataIn,
62516386
const char * idxStr, int argc, sqlite3_value ** argv,
62526387
void *queryVector, i64 k, i64 **out_topk_rowids,
62536388
f32 **out_topk_distances, i64 *out_used) {
@@ -6472,7 +6607,7 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
64726607
}
64736608

64746609
bitmap_clear(bmMetadata, p->chunk_size);
6475-
rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size);
6610+
rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i);
64766611
if(rc != SQLITE_OK) {
64776612
vtab_set_error(&p->base, "Could not filter metadata fields");
64786613
if(rc != SQLITE_OK) {
@@ -6619,6 +6754,9 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
66196754
return SQLITE_NOMEM;
66206755
}
66216756
memset(knn_data, 0, sizeof(*knn_data));
6757+
// array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
6758+
struct Array * aMetadataIn = NULL;
6759+
66226760

66236761
int query_idx =-1;
66246762
int k_idx = -1;
@@ -6738,6 +6876,95 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
67386876
}
67396877
#endif
67406878

6879+
#if COMPILER_SUPPORTS_VTAB_IN
6880+
for(int i = 0; i < argc; i++) {
6881+
if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) {
6882+
continue;
6883+
}
6884+
int metadata_idx = idxStr[1 + (i*4) + 1] - 'A';
6885+
if(!aMetadataIn) {
6886+
aMetadataIn = sqlite3_malloc(sizeof(*aMetadataIn));
6887+
if(!aMetadataIn) {
6888+
rc = SQLITE_NOMEM;
6889+
goto cleanup;
6890+
}
6891+
memset(aMetadataIn, 0, sizeof(*aMetadataIn));
6892+
rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8);
6893+
if(rc != SQLITE_OK) {
6894+
goto cleanup;
6895+
}
6896+
}
6897+
6898+
struct Vec0MetadataIn item;
6899+
memset(&item, 0, sizeof(item));
6900+
item.metadata_idx=metadata_idx;
6901+
item.argv_idx = i;
6902+
6903+
switch(p->metadata_columns[metadata_idx].kind) {
6904+
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
6905+
rc = array_init(&item.array, sizeof(i64), 16);
6906+
if(rc != SQLITE_OK) {
6907+
goto cleanup;
6908+
}
6909+
sqlite3_value *entry;
6910+
for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
6911+
i64 v = sqlite3_value_int64(entry);
6912+
rc = array_append(&item.array, &v);
6913+
if (rc != SQLITE_OK) {
6914+
goto cleanup;
6915+
}
6916+
}
6917+
6918+
if (rc != SQLITE_DONE) {
6919+
vtab_set_error(&p->base, "fuck"); // TODO
6920+
goto cleanup;
6921+
}
6922+
6923+
break;
6924+
}
6925+
case VEC0_METADATA_COLUMN_KIND_TEXT: {
6926+
rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16);
6927+
if(rc != SQLITE_OK) {
6928+
goto cleanup;
6929+
}
6930+
sqlite3_value *entry;
6931+
for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
6932+
const char * s = (const char *) sqlite3_value_text(entry);
6933+
int n = sqlite3_value_bytes(entry);
6934+
6935+
struct Vec0MetadataInTextEntry entry;
6936+
// TODO if this exits early, does it get properly cleaned up
6937+
entry.zString = sqlite3_mprintf("%.*s", n, s);
6938+
if(!entry.zString) {
6939+
rc = SQLITE_NOMEM;
6940+
goto cleanup;
6941+
}
6942+
entry.n = n;
6943+
rc = array_append(&item.array, &entry);
6944+
if (rc != SQLITE_OK) {
6945+
goto cleanup;
6946+
}
6947+
}
6948+
6949+
if (rc != SQLITE_DONE) {
6950+
vtab_set_error(&p->base, "fuck"); // TODO
6951+
goto cleanup;
6952+
}
6953+
6954+
break;
6955+
}
6956+
default: {
6957+
abort();
6958+
}
6959+
}
6960+
6961+
rc = array_append(aMetadataIn, &item);
6962+
if(rc != SQLITE_OK) {
6963+
abort(); // TODO
6964+
}
6965+
}
6966+
#endif
6967+
67416968
rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks);
67426969
if (rc != SQLITE_OK) {
67436970
// IMP: V06942_23781
@@ -6750,7 +6977,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
67506977
f32 *topk_distances = NULL;
67516978
i64 k_used = 0;
67526979
rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx,
6753-
arrayRowidsIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
6980+
arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
67546981
&topk_distances, &k_used);
67556982
if (rc != SQLITE_OK) {
67566983
goto cleanup;
@@ -6771,6 +6998,21 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
67716998
array_cleanup(arrayRowidsIn);
67726999
sqlite3_free(arrayRowidsIn);
67737000
queryVectorCleanup(queryVector);
7001+
if(aMetadataIn) {
7002+
for(size_t i = 0; i < aMetadataIn->length; i++) {
7003+
struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
7004+
for(size_t j = 0; j < item->array.length; j++) {
7005+
if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
7006+
struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j];
7007+
sqlite3_free(entry.zString);
7008+
}
7009+
}
7010+
array_cleanup(&item->array);
7011+
}
7012+
array_cleanup(aMetadataIn);
7013+
}
7014+
7015+
sqlite3_free(aMetadataIn);
67747016

67757017
return rc;
67767018
}
@@ -7049,7 +7291,8 @@ static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
70497291
int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
70507292
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
70517293
if(rc != SQLITE_OK) {
7052-
sqlite3_result_error(context, "fuck todo", -1);
7294+
// TODO handle
7295+
sqlite3_result_error(context, "fuck", -1);
70537296
}
70547297
}
70557298
return SQLITE_OK;
@@ -7121,7 +7364,8 @@ static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
71217364
int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
71227365
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
71237366
if(rc != SQLITE_OK) {
7124-
sqlite3_result_error(context, "fuck todo", -1);
7367+
// TODO handle
7368+
sqlite3_result_error(context, "fuck", -1);
71257369
}
71267370
}
71277371

@@ -7188,7 +7432,8 @@ static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
71887432
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
71897433
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
71907434
if(rc != SQLITE_OK) {
7191-
sqlite3_result_error(context, "fuck todo", -1);
7435+
// TODO: handle
7436+
sqlite3_result_error(context, "fuck", -1);
71927437
}
71937438
}
71947439

0 commit comments

Comments
 (0)