Skip to content

Commit f9fe204

Browse files
committed
prefetch the text as well
1 parent 6279059 commit f9fe204

2 files changed

Lines changed: 40 additions & 0 deletions

File tree

sa-index/src/sa_searcher.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,9 +426,23 @@ impl Searcher {
426426
if let BoundSearchResult::SearchResult((min_bound, max_bound)) = search_bound_result {
427427
// try all the partially matched suffixes and store the matching suffixes in an
428428
// array (stop when our max number of matches is reached)
429+
const ITER_PREFETCH_DISTANCE: usize = 16;
429430
let mut sa_index = min_bound;
430431
let t_iter = Instant::now();
431432
while sa_index < max_bound {
433+
// Look ITER_PREFETCH_DISTANCE steps ahead in the SA to find the future
434+
// suffix position, then prefetch the text at (future_suffix - skip).
435+
// The SA read is sequential (hardware prefetcher already handles it),
436+
// so this is cheap. The text prefetch hides the DRAM latency for the
437+
// random text access that check_prefix/check_suffix will make N iterations later.
438+
let future_sa_index = sa_index + ITER_PREFETCH_DISTANCE;
439+
if future_sa_index < max_bound {
440+
let future_suffix = self.sa.get(future_sa_index) as usize;
441+
if future_suffix >= skip {
442+
self.proteins.text().prefetch(future_suffix - skip);
443+
}
444+
}
445+
432446
let suffix = self.sa.get(sa_index) as usize;
433447

434448
if suffix >= skip {

text-compression/src/lib.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,18 @@ pub use traits::{WriteBinary, ReadBinary, ReadBinaryMmap};
1616
/// The 5-bit-to-char lookup table for mmap-backed ProteinText.
1717
const BIT5_TO_CHAR: &[u8; 27] = b"ABCDEFGHIKLMNOPQRSTUVWXYZ-$";
1818

19+
/// Non-blocking hardware prefetch hint: move the cache line containing `ptr` into L1.
20+
/// No-op on unsupported architectures.
21+
#[inline(always)]
22+
fn prefetch_read(ptr: *const u8) {
23+
#[cfg(target_arch = "x86_64")]
24+
unsafe { std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0) }
25+
#[cfg(target_arch = "aarch64")]
26+
unsafe { std::arch::asm!("prfm pldl1keep, [{p}]", p = in(reg) ptr, options(nostack, preserves_flags, readonly)) }
27+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
28+
let _ = ptr;
29+
}
30+
1931
/// Returns the number of bytes the BitArray data occupies for a given text length at 5 bits/value.
2032
pub fn bit_array_byte_size(text_length: usize) -> usize {
2133
let extra = if (text_length * 5).is_multiple_of(64) { 0 } else { 1 };
@@ -188,6 +200,20 @@ impl ProteinText {
188200
ProteinTextSlice::new(self, start, end)
189201
}
190202

203+
/// Non-blocking hardware prefetch hint for the cache line covering character `index`.
204+
/// At 5 bits/char a 64-byte cache line holds 102 characters, so a single hint covers
205+
/// the entire peptide window starting at `index`.
206+
/// No-op for in-memory text and on unsupported platforms.
207+
#[inline]
208+
pub fn prefetch(&self, index: usize) {
209+
if let ProteinText::MmapBacked { mmap, data_offset, .. } = self {
210+
let byte_off = data_offset + (index * 5 / 64) * 8;
211+
if byte_off + 8 <= mmap.len() {
212+
prefetch_read(&mmap[byte_off] as *const u8);
213+
}
214+
}
215+
}
216+
191217
}
192218

193219
impl WriteBinary for ProteinText {

0 commit comments

Comments
 (0)