Skip to content

Commit 5666dfe

Browse files
committed
Possible optimization for mmap
1 parent 5f738a8 commit 5666dfe

3 files changed

Lines changed: 203 additions & 5 deletions

File tree

sa-index/src/array/mmap.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,89 @@ pub(super) fn read_u64_le(mmap: &Mmap, byte_offset: usize) -> u64 {
66
u64::from_le_bytes(bytes)
77
}
88

9+
/// Streaming sequential iterator over a contiguous range of a compressed or uncompressed
10+
/// mmap-backed suffix array.
11+
///
12+
/// Keeps `current_word` and `next_word` in local variables (register-allocated by the
13+
/// compiler) so that a new mmap read only occurs when crossing a 64-bit block boundary —
14+
/// roughly once per 1.6 entries for a 40-bit SA, vs 1–2 reads per entry with `get_mmap`.
15+
pub(crate) struct MmapSaRangeIter<'a> {
16+
mmap: &'a Mmap,
17+
data_offset: usize,
18+
bits_per_value: usize,
19+
mask: u64,
20+
current_word: u64, // u64 block containing the next value to yield
21+
next_word: u64, // u64 block after current_word (pre-loaded)
22+
block_idx: usize, // index of current_word within the data section
23+
bit_off: usize, // bit offset of next value within current_word (0..64)
24+
remaining: usize, // entries left to yield
25+
}
26+
27+
impl<'a> MmapSaRangeIter<'a> {
28+
pub(crate) fn new(
29+
mmap: &'a Mmap,
30+
data_offset: usize,
31+
bits_per_value: usize,
32+
start: usize,
33+
end: usize,
34+
) -> Self {
35+
let remaining = end.saturating_sub(start);
36+
if remaining == 0 {
37+
return Self {
38+
mmap, data_offset, bits_per_value,
39+
mask: 0, current_word: 0, next_word: 0,
40+
block_idx: 0, bit_off: 0, remaining: 0,
41+
};
42+
}
43+
44+
// (1u64 << 64) overflows; use u64::MAX for the 64-bit uncompressed case
45+
let mask = if bits_per_value == 64 { u64::MAX } else { (1u64 << bits_per_value) - 1 };
46+
47+
let bit_pos = start * bits_per_value;
48+
let block_idx = bit_pos / 64;
49+
let bit_off = bit_pos % 64;
50+
51+
let current_word = read_u64_le(mmap, data_offset + block_idx * 8);
52+
let next_off = data_offset + (block_idx + 1) * 8;
53+
let next_word = if next_off + 8 <= mmap.len() { read_u64_le(mmap, next_off) } else { 0 };
54+
55+
Self { mmap, data_offset, bits_per_value, mask, current_word, next_word, block_idx, bit_off, remaining }
56+
}
57+
}
58+
59+
impl Iterator for MmapSaRangeIter<'_> {
60+
type Item = i64;
61+
62+
#[inline]
63+
fn next(&mut self) -> Option<i64> {
64+
if self.remaining == 0 { return None; }
65+
self.remaining -= 1;
66+
67+
let val = if self.bit_off + self.bits_per_value <= 64 {
68+
// Value fits entirely within current_word
69+
(self.current_word >> (64 - self.bit_off - self.bits_per_value)) & self.mask
70+
} else {
71+
// Value spans current_word and next_word
72+
let end_off = (self.bit_off + self.bits_per_value) % 64;
73+
((self.current_word << end_off) | (self.next_word >> (64 - end_off))) & self.mask
74+
};
75+
76+
// Advance bit cursor; load next word from mmap only on block-boundary crossing
77+
self.bit_off += self.bits_per_value;
78+
if self.bit_off >= 64 {
79+
self.bit_off -= 64;
80+
self.block_idx += 1;
81+
self.current_word = self.next_word;
82+
let next_off = self.data_offset + (self.block_idx + 1) * 8;
83+
self.next_word = if next_off + 8 <= self.mmap.len() {
84+
read_u64_le(self.mmap, next_off)
85+
} else { 0 };
86+
}
87+
88+
Some(val as i64)
89+
}
90+
}
91+
992
/// Returns the suffix array value at the given index from a memory-mapped file.
1093
pub(super) fn get_mmap(mmap: &Mmap, data_offset: usize, bits_per_value: usize, index: usize) -> i64 {
1194
if bits_per_value == 64 {

sa-index/src/array/mod.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,22 @@ impl SuffixArray {
105105
}
106106
}
107107

108+
/// Returns a streaming iterator over SA entries in `[start, end)`.
109+
/// For `MmapBacked`, uses a streaming bit reader that keeps u64 words in CPU registers
110+
/// and only accesses the mmap when crossing a 64-bit block boundary.
111+
pub fn iter_range(&self, start: usize, end: usize) -> SuffixArrayRangeIter<'_> {
112+
match self {
113+
SuffixArray::Original(sa, _) =>
114+
SuffixArrayRangeIter::Original(sa.get(start..end).unwrap_or(&[]).iter()),
115+
SuffixArray::Compressed(ba, _) =>
116+
SuffixArrayRangeIter::Compressed { ba, idx: start, end },
117+
SuffixArray::MmapBacked { mmap, data_offset, bits_per_value, .. } =>
118+
SuffixArrayRangeIter::Mmap(mmap::MmapSaRangeIter::new(
119+
mmap, *data_offset, *bits_per_value, start, end,
120+
)),
121+
}
122+
}
123+
108124
/// Issues an OS prefetch hint (`MADV_WILLNEED`) for the mmap pages covering SA indices
109125
/// `lo..hi_exclusive`. No-op for in-memory variants and on non-Unix platforms.
110126
#[inline]
@@ -122,6 +138,35 @@ impl SuffixArray {
122138
}
123139
}
124140

141+
/// Iterator over a contiguous range of SA entries.
142+
/// - `Original`: wraps a slice iterator — zero overhead.
143+
/// - `Compressed`: calls `BitArray::get()` per entry (heap-hot data).
144+
/// - `Mmap`: uses `MmapSaRangeIter` — keeps u64 words in registers, mmap only touched at block boundaries.
145+
pub enum SuffixArrayRangeIter<'a> {
146+
Original(std::slice::Iter<'a, i64>),
147+
Compressed { ba: &'a BitArray, idx: usize, end: usize },
148+
#[doc(hidden)]
149+
Mmap(mmap::MmapSaRangeIter<'a>),
150+
}
151+
152+
impl Iterator for SuffixArrayRangeIter<'_> {
153+
type Item = i64;
154+
155+
#[inline]
156+
fn next(&mut self) -> Option<i64> {
157+
match self {
158+
Self::Original(iter) => iter.next().copied(),
159+
Self::Compressed { ba, idx, end } => {
160+
if *idx >= *end { return None; }
161+
let val = ba.get(*idx) as i64;
162+
*idx += 1;
163+
Some(val)
164+
}
165+
Self::Mmap(iter) => iter.next(),
166+
}
167+
}
168+
}
169+
125170
impl WriteBinary for SuffixArray {
126171
fn write_binary<W: Write>(self, writer: &mut W) -> Result<(), Box<dyn std::error::Error>> {
127172
match self {
@@ -411,4 +456,77 @@ mod tests {
411456
assert_eq!(loaded.get(i), i as i64 + 1);
412457
}
413458
}
459+
460+
/// Verifies that `iter_range(start, end)` yields the same values as repeated `get(i)`
461+
/// calls for all three SA variants, including ranges that cross multiple 64-bit block
462+
/// boundaries and a non-zero start offset.
463+
#[test]
464+
fn test_iter_range_matches_get() {
465+
use tempdir::TempDir;
466+
467+
// 20 values — enough to cross multiple 64-bit blocks for a 40-bit SA (8 entries/cycle)
468+
let values: Vec<i64> = (0..20).map(|i| i * 12345 + 7).collect();
469+
470+
// --- Original (Vec<i64>) ---
471+
{
472+
let sa = SuffixArray::Original(values.clone(), 1);
473+
let collected: Vec<i64> = sa.iter_range(3, 17).collect();
474+
let expected: Vec<i64> = (3..17).map(|i| sa.get(i)).collect();
475+
assert_eq!(collected, expected, "Original iter_range mismatch");
476+
}
477+
478+
// --- Compressed (BitArray, 40-bit) ---
479+
{
480+
let mut ba = BitArray::with_capacity(20, 40);
481+
for (i, &v) in values.iter().enumerate() {
482+
ba.set(i, v as u64);
483+
}
484+
let sa = SuffixArray::Compressed(ba, 1);
485+
let collected: Vec<i64> = sa.iter_range(3, 17).collect();
486+
let expected: Vec<i64> = (3..17).map(|i| sa.get(i)).collect();
487+
assert_eq!(collected, expected, "Compressed iter_range mismatch");
488+
}
489+
490+
// --- MmapBacked (40-bit compressed, via round-trip through file) ---
491+
{
492+
let tmp = TempDir::new("iter_range_mmap").unwrap();
493+
let path = tmp.path().join("sa.bin");
494+
let mut file = std::fs::File::create(&path).unwrap();
495+
dump_compressed_suffix_array(values.clone(), 1, 40, &mut file).unwrap();
496+
drop(file);
497+
498+
let sa = SuffixArray::read_binary_mmap(&path).unwrap();
499+
let collected: Vec<i64> = sa.iter_range(3, 17).collect();
500+
let expected: Vec<i64> = (3..17).map(|i| sa.get(i)).collect();
501+
assert_eq!(collected, expected, "MmapBacked iter_range mismatch");
502+
}
503+
504+
// --- MmapBacked (64-bit uncompressed) ---
505+
{
506+
let tmp = TempDir::new("iter_range_mmap64").unwrap();
507+
let path = tmp.path().join("sa64.bin");
508+
let mut file = std::fs::File::create(&path).unwrap();
509+
dump_suffix_array(values.clone(), 1, &mut file).unwrap();
510+
drop(file);
511+
512+
let sa = SuffixArray::read_binary_mmap(&path).unwrap();
513+
let collected: Vec<i64> = sa.iter_range(3, 17).collect();
514+
let expected: Vec<i64> = (3..17).map(|i| sa.get(i)).collect();
515+
assert_eq!(collected, expected, "MmapBacked-64 iter_range mismatch");
516+
}
517+
518+
// --- Edge case: empty range (start == end) ---
519+
{
520+
let sa = SuffixArray::Original(values.clone(), 1);
521+
let collected: Vec<i64> = sa.iter_range(5, 5).collect();
522+
assert!(collected.is_empty(), "Empty range should yield nothing");
523+
}
524+
525+
// --- Edge case: inverted range (start > end) — must not panic ---
526+
{
527+
let sa = SuffixArray::Original(values.clone(), 1);
528+
let collected: Vec<i64> = sa.iter_range(10, 3).collect();
529+
assert!(collected.is_empty(), "Inverted range should yield nothing");
530+
}
531+
}
414532
}

sa-index/src/sa_searcher.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,10 +426,9 @@ impl Searcher {
426426
if let BoundSearchResult::SearchResult((min_bound, max_bound)) = search_bound_result {
427427
// try all the partially matched suffixes and store the matching suffixes in an
428428
// array (stop when our max number of matches is reached)
429-
let mut sa_index = min_bound;
430429
let t_iter = Instant::now();
431-
while sa_index < max_bound {
432-
let suffix = self.sa.get(sa_index) as usize;
430+
for suffix in self.sa.iter_range(min_bound, max_bound) {
431+
let suffix = suffix as usize;
433432

434433
if suffix >= skip {
435434
let match_start = suffix - skip;
@@ -465,8 +464,6 @@ impl Searcher {
465464
}
466465
}
467466
}
468-
469-
sa_index += 1;
470467
}
471468
self.match_iter_ns.fetch_add(t_iter.elapsed().as_nanos() as u64, Ordering::Relaxed);
472469
}

0 commit comments

Comments
 (0)