Skip to content

Commit 9480627

Browse files
authored
[ENH] Brute force metadata filtering operator (#2264)
## Description of changes - Improvements & Bug fixes Introduces an operator that takes the where and where_document clauses and returns a list of offset ids of the log that match the conditions. Note wiring it into the orchestrator is a bit more work and will be a separate PR ## Test plan - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes None
1 parent 3ec627d commit 9480627

File tree

7 files changed

+1286
-392
lines changed

7 files changed

+1286
-392
lines changed

rust/worker/src/execution/operators/brute_force_metadata_filtering.rs

Lines changed: 717 additions & 0 deletions
Large diffs are not rendered by default.

rust/worker/src/execution/operators/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub(super) mod brute_force_knn;
2+
pub(super) mod brute_force_metadata_filtering;
23
pub(super) mod count_records;
34
pub(super) mod flush_s3;
45
pub(super) mod hnsw_knn;

rust/worker/src/index/fulltext/types.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ use crate::blockstore::positional_posting_list_value::PositionalPostingListBuild
22
use crate::blockstore::{BlockfileFlusher, BlockfileReader, BlockfileWriter};
33
use crate::errors::{ChromaError, ErrorCodes};
44
use crate::index::fulltext::tokenizer::ChromaTokenizer;
5+
use crate::index::metadata::types::MetadataIndexError;
6+
use crate::types::{BooleanOperator, WhereDocument, WhereDocumentOperator};
7+
use crate::utils::{merge_sorted_vecs_conjunction, merge_sorted_vecs_disjunction};
58

69
use arrow::array::Int32Array;
710
use parking_lot::Mutex;
11+
use roaring::RoaringBitmap;
812
use std::collections::HashMap;
913
use std::sync::Arc;
1014
use thiserror::Error;
@@ -302,6 +306,56 @@ impl<'me> FullTextIndexReader<'me> {
302306
}
303307
}
304308

309+
pub(crate) fn process_where_document_clause_with_callback<
310+
F: Fn(&str, WhereDocumentOperator) -> Vec<i32>,
311+
>(
312+
where_document_clause: &WhereDocument,
313+
callback: &F,
314+
) -> Result<Vec<usize>, MetadataIndexError> {
315+
let mut results = vec![];
316+
match where_document_clause {
317+
WhereDocument::DirectWhereDocumentComparison(direct_document_comparison) => {
318+
match &direct_document_comparison.operator {
319+
WhereDocumentOperator::Contains => {
320+
let result = callback(
321+
&direct_document_comparison.document,
322+
WhereDocumentOperator::Contains,
323+
);
324+
results = result.iter().map(|x| *x as usize).collect();
325+
}
326+
WhereDocumentOperator::NotContains => {
327+
todo!();
328+
}
329+
}
330+
}
331+
WhereDocument::WhereDocumentChildren(where_document_children) => {
332+
let mut first_iteration = true;
333+
for child in where_document_children.children.iter() {
334+
let child_results: Vec<usize> =
335+
match process_where_document_clause_with_callback(&child, callback) {
336+
Ok(result) => result,
337+
Err(_) => vec![],
338+
};
339+
if first_iteration {
340+
results = child_results;
341+
first_iteration = false;
342+
} else {
343+
match where_document_children.operator {
344+
BooleanOperator::And => {
345+
results = merge_sorted_vecs_conjunction(results, child_results);
346+
}
347+
BooleanOperator::Or => {
348+
results = merge_sorted_vecs_disjunction(results, child_results);
349+
}
350+
}
351+
}
352+
}
353+
}
354+
}
355+
results.sort();
356+
return Ok(results);
357+
}
358+
305359
#[cfg(test)]
306360
mod tests {
307361
use super::*;

rust/worker/src/index/metadata/types.rs

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use crate::blockstore::{key::KeyWrapper, BlockfileFlusher, BlockfileReader, BlockfileWriter};
22
use crate::errors::{ChromaError, ErrorCodes};
3+
use crate::types::{BooleanOperator, MetadataType, Where, WhereClauseComparator, WhereComparison};
4+
use crate::utils::{merge_sorted_vecs_conjunction, merge_sorted_vecs_disjunction};
35
use thiserror::Error;
46
use uuid::Uuid;
57

@@ -57,6 +59,273 @@ pub(crate) enum MetadataIndexWriter {
5759
),
5860
}
5961

62+
pub(crate) fn process_where_clause_with_callback<
63+
F: Fn(&str, &KeyWrapper, MetadataType, WhereClauseComparator) -> RoaringBitmap,
64+
>(
65+
where_clause: &Where,
66+
callback: &F,
67+
) -> Result<Vec<usize>, MetadataIndexError> {
68+
let mut results = vec![];
69+
match where_clause {
70+
Where::DirectWhereComparison(direct_where_comparison) => {
71+
match &direct_where_comparison.comparison {
72+
WhereComparison::SingleStringComparison(operand, comparator) => {
73+
match comparator {
74+
WhereClauseComparator::Equal => {
75+
let metadata_value_keywrapper = operand.as_str().try_into();
76+
match metadata_value_keywrapper {
77+
Ok(keywrapper) => {
78+
let result = callback(
79+
&direct_where_comparison.key,
80+
&keywrapper,
81+
MetadataType::StringType,
82+
WhereClauseComparator::Equal,
83+
);
84+
results = result.iter().map(|x| x as usize).collect();
85+
}
86+
Err(_) => {
87+
panic!("Error converting string to keywrapper")
88+
}
89+
}
90+
}
91+
WhereClauseComparator::NotEqual => {
92+
todo!();
93+
}
94+
// We don't allow these comparators for strings.
95+
WhereClauseComparator::LessThan => {
96+
unimplemented!();
97+
}
98+
WhereClauseComparator::LessThanOrEqual => {
99+
unimplemented!();
100+
}
101+
WhereClauseComparator::GreaterThan => {
102+
unimplemented!();
103+
}
104+
WhereClauseComparator::GreaterThanOrEqual => {
105+
unimplemented!();
106+
}
107+
}
108+
}
109+
WhereComparison::SingleIntComparison(operand, comparator) => match comparator {
110+
WhereClauseComparator::Equal => {
111+
let metadata_value_keywrapper = (*operand).try_into();
112+
match metadata_value_keywrapper {
113+
Ok(keywrapper) => {
114+
let result = callback(
115+
&direct_where_comparison.key,
116+
&keywrapper,
117+
MetadataType::IntType,
118+
WhereClauseComparator::Equal,
119+
);
120+
results = result.iter().map(|x| x as usize).collect();
121+
}
122+
Err(_) => {
123+
panic!("Error converting int to keywrapper")
124+
}
125+
}
126+
}
127+
WhereClauseComparator::NotEqual => {
128+
todo!();
129+
}
130+
WhereClauseComparator::LessThan => {
131+
let metadata_value_keywrapper = (*operand).try_into();
132+
match metadata_value_keywrapper {
133+
Ok(keywrapper) => {
134+
let result = callback(
135+
&direct_where_comparison.key,
136+
&keywrapper,
137+
MetadataType::IntType,
138+
WhereClauseComparator::LessThan,
139+
);
140+
results = result.iter().map(|x| x as usize).collect();
141+
}
142+
Err(_) => {
143+
panic!("Error converting int to keywrapper")
144+
}
145+
}
146+
}
147+
WhereClauseComparator::LessThanOrEqual => {
148+
let metadata_value_keywrapper = (*operand).try_into();
149+
match metadata_value_keywrapper {
150+
Ok(keywrapper) => {
151+
let result = callback(
152+
&direct_where_comparison.key,
153+
&keywrapper,
154+
MetadataType::IntType,
155+
WhereClauseComparator::LessThanOrEqual,
156+
);
157+
results = result.iter().map(|x| x as usize).collect();
158+
}
159+
Err(_) => {
160+
panic!("Error converting int to keywrapper")
161+
}
162+
}
163+
}
164+
WhereClauseComparator::GreaterThan => {
165+
let metadata_value_keywrapper = (*operand).try_into();
166+
match metadata_value_keywrapper {
167+
Ok(keywrapper) => {
168+
let result = callback(
169+
&direct_where_comparison.key,
170+
&keywrapper,
171+
MetadataType::IntType,
172+
WhereClauseComparator::GreaterThan,
173+
);
174+
results = result.iter().map(|x| x as usize).collect();
175+
}
176+
Err(_) => {
177+
panic!("Error converting int to keywrapper")
178+
}
179+
}
180+
}
181+
WhereClauseComparator::GreaterThanOrEqual => {
182+
let metadata_value_keywrapper = (*operand).try_into();
183+
match metadata_value_keywrapper {
184+
Ok(keywrapper) => {
185+
let result = callback(
186+
&direct_where_comparison.key,
187+
&keywrapper,
188+
MetadataType::IntType,
189+
WhereClauseComparator::GreaterThanOrEqual,
190+
);
191+
results = result.iter().map(|x| x as usize).collect();
192+
}
193+
Err(_) => {
194+
panic!("Error converting int to keywrapper")
195+
}
196+
}
197+
}
198+
},
199+
WhereComparison::SingleDoubleComparison(operand, comparator) => match comparator {
200+
WhereClauseComparator::Equal => {
201+
let metadata_value_keywrapper = (*operand as f32).try_into();
202+
match metadata_value_keywrapper {
203+
Ok(keywrapper) => {
204+
let result = callback(
205+
&direct_where_comparison.key,
206+
&keywrapper,
207+
MetadataType::DoubleType,
208+
WhereClauseComparator::Equal,
209+
);
210+
results = result.iter().map(|x| x as usize).collect();
211+
}
212+
Err(_) => {
213+
panic!("Error converting double to keywrapper")
214+
}
215+
}
216+
}
217+
WhereClauseComparator::NotEqual => {
218+
todo!();
219+
}
220+
WhereClauseComparator::LessThan => {
221+
let metadata_value_keywrapper = (*operand as f32).try_into();
222+
match metadata_value_keywrapper {
223+
Ok(keywrapper) => {
224+
let result = callback(
225+
&direct_where_comparison.key,
226+
&keywrapper,
227+
MetadataType::DoubleType,
228+
WhereClauseComparator::LessThan,
229+
);
230+
results = result.iter().map(|x| x as usize).collect();
231+
}
232+
Err(_) => {
233+
panic!("Error converting double to keywrapper")
234+
}
235+
}
236+
}
237+
WhereClauseComparator::LessThanOrEqual => {
238+
let metadata_value_keywrapper = (*operand as f32).try_into();
239+
match metadata_value_keywrapper {
240+
Ok(keywrapper) => {
241+
let result = callback(
242+
&direct_where_comparison.key,
243+
&keywrapper,
244+
MetadataType::DoubleType,
245+
WhereClauseComparator::LessThanOrEqual,
246+
);
247+
results = result.iter().map(|x| x as usize).collect();
248+
}
249+
Err(_) => {
250+
panic!("Error converting double to keywrapper")
251+
}
252+
}
253+
}
254+
WhereClauseComparator::GreaterThan => {
255+
let metadata_value_keywrapper = (*operand as f32).try_into();
256+
match metadata_value_keywrapper {
257+
Ok(keywrapper) => {
258+
let result = callback(
259+
&direct_where_comparison.key,
260+
&keywrapper,
261+
MetadataType::DoubleType,
262+
WhereClauseComparator::GreaterThan,
263+
);
264+
results = result.iter().map(|x| x as usize).collect();
265+
}
266+
Err(_) => {
267+
panic!("Error converting double to keywrapper")
268+
}
269+
}
270+
}
271+
WhereClauseComparator::GreaterThanOrEqual => {
272+
let metadata_value_keywrapper = (*operand as f32).try_into();
273+
match metadata_value_keywrapper {
274+
Ok(keywrapper) => {
275+
let result = callback(
276+
&direct_where_comparison.key,
277+
&keywrapper,
278+
MetadataType::DoubleType,
279+
WhereClauseComparator::GreaterThanOrEqual,
280+
);
281+
results = result.iter().map(|x| x as usize).collect();
282+
}
283+
Err(_) => {
284+
panic!("Error converting double to keywrapper")
285+
}
286+
}
287+
}
288+
},
289+
WhereComparison::StringListComparison(operand, list_operator) => {
290+
todo!();
291+
}
292+
WhereComparison::IntListComparison(..) => {
293+
todo!();
294+
}
295+
WhereComparison::DoubleListComparison(..) => {
296+
todo!();
297+
}
298+
}
299+
}
300+
Where::WhereChildren(where_children) => {
301+
// This feels like a crime.
302+
let mut first_iteration = true;
303+
for child in where_children.children.iter() {
304+
let child_results: Vec<usize> =
305+
match process_where_clause_with_callback(&child, callback) {
306+
Ok(result) => result,
307+
Err(_) => vec![],
308+
};
309+
if first_iteration {
310+
results = child_results;
311+
first_iteration = false;
312+
} else {
313+
match where_children.operator {
314+
BooleanOperator::And => {
315+
results = merge_sorted_vecs_conjunction(results, child_results);
316+
}
317+
BooleanOperator::Or => {
318+
results = merge_sorted_vecs_disjunction(results, child_results);
319+
}
320+
}
321+
}
322+
}
323+
}
324+
}
325+
results.sort();
326+
return Ok(results);
327+
}
328+
60329
impl MetadataIndexWriter {
61330
pub fn new_string(init_blockfile_writer: BlockfileWriter) -> Self {
62331
MetadataIndexWriter::StringMetadataIndexWriter(

0 commit comments

Comments
 (0)