@@ -105,6 +105,22 @@ impl SuffixArray {
105105 }
106106 }
107107
108+ /// Returns a streaming iterator over SA entries in `[start, end)`.
109+ /// For `MmapBacked`, uses a streaming bit reader that keeps u64 words in CPU registers
110+ /// and only accesses the mmap when crossing a 64-bit block boundary.
111+ pub fn iter_range ( & self , start : usize , end : usize ) -> SuffixArrayRangeIter < ' _ > {
112+ match self {
113+ SuffixArray :: Original ( sa, _) =>
114+ SuffixArrayRangeIter :: Original ( sa. get ( start..end) . unwrap_or ( & [ ] ) . iter ( ) ) ,
115+ SuffixArray :: Compressed ( ba, _) =>
116+ SuffixArrayRangeIter :: Compressed { ba, idx : start, end } ,
117+ SuffixArray :: MmapBacked { mmap, data_offset, bits_per_value, .. } =>
118+ SuffixArrayRangeIter :: Mmap ( mmap:: MmapSaRangeIter :: new (
119+ mmap, * data_offset, * bits_per_value, start, end,
120+ ) ) ,
121+ }
122+ }
123+
108124 /// Issues an OS prefetch hint (`MADV_WILLNEED`) for the mmap pages covering SA indices
109125 /// `lo..hi_exclusive`. No-op for in-memory variants and on non-Unix platforms.
110126 #[ inline]
@@ -122,6 +138,35 @@ impl SuffixArray {
122138 }
123139}
124140
141+ /// Iterator over a contiguous range of SA entries.
142+ /// - `Original`: wraps a slice iterator — zero overhead.
143+ /// - `Compressed`: calls `BitArray::get()` per entry (heap-hot data).
144+ /// - `Mmap`: uses `MmapSaRangeIter` — keeps u64 words in registers, mmap only touched at block boundaries.
145+ pub enum SuffixArrayRangeIter < ' a > {
146+ Original ( std:: slice:: Iter < ' a , i64 > ) ,
147+ Compressed { ba : & ' a BitArray , idx : usize , end : usize } ,
148+ #[ doc( hidden) ]
149+ Mmap ( mmap:: MmapSaRangeIter < ' a > ) ,
150+ }
151+
152+ impl Iterator for SuffixArrayRangeIter < ' _ > {
153+ type Item = i64 ;
154+
155+ #[ inline]
156+ fn next ( & mut self ) -> Option < i64 > {
157+ match self {
158+ Self :: Original ( iter) => iter. next ( ) . copied ( ) ,
159+ Self :: Compressed { ba, idx, end } => {
160+ if * idx >= * end { return None ; }
161+ let val = ba. get ( * idx) as i64 ;
162+ * idx += 1 ;
163+ Some ( val)
164+ }
165+ Self :: Mmap ( iter) => iter. next ( ) ,
166+ }
167+ }
168+ }
169+
125170impl WriteBinary for SuffixArray {
126171 fn write_binary < W : Write > ( self , writer : & mut W ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
127172 match self {
@@ -411,4 +456,77 @@ mod tests {
411456 assert_eq ! ( loaded. get( i) , i as i64 + 1 ) ;
412457 }
413458 }
459+
460+ /// Verifies that `iter_range(start, end)` yields the same values as repeated `get(i)`
461+ /// calls for all three SA variants, including ranges that cross multiple 64-bit block
462+ /// boundaries and a non-zero start offset.
463+ #[ test]
464+ fn test_iter_range_matches_get ( ) {
465+ use tempdir:: TempDir ;
466+
467+ // 20 values — enough to cross multiple 64-bit blocks for a 40-bit SA (8 entries/cycle)
468+ let values: Vec < i64 > = ( 0 ..20 ) . map ( |i| i * 12345 + 7 ) . collect ( ) ;
469+
470+ // --- Original (Vec<i64>) ---
471+ {
472+ let sa = SuffixArray :: Original ( values. clone ( ) , 1 ) ;
473+ let collected: Vec < i64 > = sa. iter_range ( 3 , 17 ) . collect ( ) ;
474+ let expected: Vec < i64 > = ( 3 ..17 ) . map ( |i| sa. get ( i) ) . collect ( ) ;
475+ assert_eq ! ( collected, expected, "Original iter_range mismatch" ) ;
476+ }
477+
478+ // --- Compressed (BitArray, 40-bit) ---
479+ {
480+ let mut ba = BitArray :: with_capacity ( 20 , 40 ) ;
481+ for ( i, & v) in values. iter ( ) . enumerate ( ) {
482+ ba. set ( i, v as u64 ) ;
483+ }
484+ let sa = SuffixArray :: Compressed ( ba, 1 ) ;
485+ let collected: Vec < i64 > = sa. iter_range ( 3 , 17 ) . collect ( ) ;
486+ let expected: Vec < i64 > = ( 3 ..17 ) . map ( |i| sa. get ( i) ) . collect ( ) ;
487+ assert_eq ! ( collected, expected, "Compressed iter_range mismatch" ) ;
488+ }
489+
490+ // --- MmapBacked (40-bit compressed, via round-trip through file) ---
491+ {
492+ let tmp = TempDir :: new ( "iter_range_mmap" ) . unwrap ( ) ;
493+ let path = tmp. path ( ) . join ( "sa.bin" ) ;
494+ let mut file = std:: fs:: File :: create ( & path) . unwrap ( ) ;
495+ dump_compressed_suffix_array ( values. clone ( ) , 1 , 40 , & mut file) . unwrap ( ) ;
496+ drop ( file) ;
497+
498+ let sa = SuffixArray :: read_binary_mmap ( & path) . unwrap ( ) ;
499+ let collected: Vec < i64 > = sa. iter_range ( 3 , 17 ) . collect ( ) ;
500+ let expected: Vec < i64 > = ( 3 ..17 ) . map ( |i| sa. get ( i) ) . collect ( ) ;
501+ assert_eq ! ( collected, expected, "MmapBacked iter_range mismatch" ) ;
502+ }
503+
504+ // --- MmapBacked (64-bit uncompressed) ---
505+ {
506+ let tmp = TempDir :: new ( "iter_range_mmap64" ) . unwrap ( ) ;
507+ let path = tmp. path ( ) . join ( "sa64.bin" ) ;
508+ let mut file = std:: fs:: File :: create ( & path) . unwrap ( ) ;
509+ dump_suffix_array ( values. clone ( ) , 1 , & mut file) . unwrap ( ) ;
510+ drop ( file) ;
511+
512+ let sa = SuffixArray :: read_binary_mmap ( & path) . unwrap ( ) ;
513+ let collected: Vec < i64 > = sa. iter_range ( 3 , 17 ) . collect ( ) ;
514+ let expected: Vec < i64 > = ( 3 ..17 ) . map ( |i| sa. get ( i) ) . collect ( ) ;
515+ assert_eq ! ( collected, expected, "MmapBacked-64 iter_range mismatch" ) ;
516+ }
517+
518+ // --- Edge case: empty range (start == end) ---
519+ {
520+ let sa = SuffixArray :: Original ( values. clone ( ) , 1 ) ;
521+ let collected: Vec < i64 > = sa. iter_range ( 5 , 5 ) . collect ( ) ;
522+ assert ! ( collected. is_empty( ) , "Empty range should yield nothing" ) ;
523+ }
524+
525+ // --- Edge case: inverted range (start > end) — must not panic ---
526+ {
527+ let sa = SuffixArray :: Original ( values. clone ( ) , 1 ) ;
528+ let collected: Vec < i64 > = sa. iter_range ( 10 , 3 ) . collect ( ) ;
529+ assert ! ( collected. is_empty( ) , "Inverted range should yield nothing" ) ;
530+ }
531+ }
414532}
0 commit comments