Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 1c93186

Browse files
committed
resolve phrase
1 parent 43c5894 commit 1c93186

File tree

1 file changed

+16
-42
lines changed
  • milli/src/search/criteria

1 file changed

+16
-42
lines changed

milli/src/search/criteria/mod.rs

Lines changed: 16 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use self::words::Words;
1515
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
1616
use crate::search::criteria::geo::Geo;
1717
use crate::search::{word_derivations, WordDerivationsCache};
18-
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
18+
use crate::{AscDesc as AscDescName, DocumentId, Error, FieldId, Index, Member, Result};
1919

2020
mod asc_desc;
2121
mod attribute;
@@ -307,14 +307,7 @@ pub fn resolve_query_tree(
307307
use Operation::{And, Or, Phrase, Query};
308308

309309
match query_tree {
310-
And(ops) => {
311-
let candidates = ops
312-
.iter()
313-
.map(|op| resolve_operation(ctx, op, wdcache))
314-
.collect::<Result<Vec<_>>>()?;
315-
316-
Ok(candidates.and())
317-
}
310+
And(ops) => ops.into_iter().map(|op| resolve_operation(ctx, op, wdcache)).and(),
318311
Or(_, ops) => ops.into_iter().map(|op| resolve_operation(ctx, op, wdcache)).or(),
319312
Phrase(words) => resolve_phrase(ctx, &words),
320313
Query(q) => Ok(query_docids(ctx, q, wdcache)?),
@@ -325,41 +318,22 @@ pub fn resolve_query_tree(
325318
}
326319

327320
pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBitmap> {
328-
let mut candidates = RoaringBitmap::new();
329-
let mut first_iter = true;
330321
let winsize = phrase.len().min(7);
331322

332-
for win in phrase.windows(winsize) {
333-
// Get all the documents with the matching distance for each word pairs.
334-
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
335-
for (offset, s1) in win.iter().enumerate() {
336-
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
337-
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
338-
Some(m) => bitmaps.push(m),
339-
// If there are no document for this distance, there will be no
340-
// results for the phrase query.
341-
None => return Ok(RoaringBitmap::new()),
342-
}
343-
}
344-
}
345-
346-
// We sort the bitmaps so that we perform the small intersections first, which is faster.
347-
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
348-
349-
for bitmap in bitmaps {
350-
if first_iter {
351-
candidates = bitmap;
352-
first_iter = false;
353-
} else {
354-
candidates &= bitmap;
355-
}
356-
// There will be no match, return early
357-
if candidates.is_empty() {
358-
break;
359-
}
360-
}
361-
}
362-
Ok(candidates)
323+
phrase
324+
.windows(winsize)
325+
.flat_map(|win| {
326+
win.iter().enumerate().flat_map(move |(offset, s1)| {
327+
win.iter().skip(offset + 1).enumerate().map(move |(dist, s2)| {
328+
ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)
329+
// If there are no document for this distance, there will be no
330+
// results for the phrase query.
331+
.map(|m| m.unwrap_or_default())
332+
})
333+
})
334+
})
335+
.and()
336+
.map_err(Error::from)
363337
}
364338

365339
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(

0 commit comments

Comments
 (0)