Skip to content

Commit fe73c27

Browse files
committed
tidyup: no semantically meaningful changes, subtle refactor
1 parent b878f56 commit fe73c27

9 files changed

Lines changed: 130 additions & 236 deletions

File tree

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ To avoid issues, use --same-file-system when traversing symlinks. Both fd and fi
132132

133133
The following function provides an elegant solution to avoid branch mispredictions/SIMD instructions during directory entry parsing (a performance-critical loop):
134134

135+
Check source code for further explanation [in src/utils.rs](./src/utils.rs#L195)**
136+
135137
```rust
136138
// Computational complexity: O(1) - truly constant time
137139
// Used on Linux/Solaris/Illumos/Android systems; BSD systems/macOS store name length trivially
@@ -149,12 +151,13 @@ pub const unsafe fn dirent_const_time_strlen(drnt: *const dirent64) -> usize {
149151
const DIRENT_HEADER_START: usize = std::mem::offset_of!(dirent64, d_name);
150152
let reclen = unsafe { (*drnt).d_reclen as usize };
151153
// Access the last 8 bytes of the word (this is an aligned read due to kernel providing 8 byte aligned dirent structs!)
152-
let last_word: u64 = unsafe { *(drnt.cast::<u8>()).add(reclen - 8).cast::<u64>() };
154+
let last_word: u64 = unsafe { *(drnt.byte_add(reclen - 8).cast::<u64>()) };
153155
// reclen is always multiple of 8 so alignment is guaranteed
154156
let mask = MASK * ((reclen == 24) as u64); // branchless mask
155157
let candidate_pos = last_word | mask; //Mask out the false nulls when d_name is short
156158
//The idea is to convert each 0-byte to 0x80, and each nonzero byte to 0x00
157-
let zero_bit = unsafe { // Use specialised instructions (ctl_nonzero) to avoid 0 check for bitscan forward so it compiles to tzcnt on most CPU's
159+
let zero_bit = unsafe { // Use specialised instructions (ctl_nonzero)
160+
//to avoid 0 check for bitscan forward so it compiles to tzcnt on most CPU's
158161
NonZeroU64::new_unchecked(candidate_pos.wrapping_sub(LO_U64) & !candidate_pos & HI_U64)
159162
};
160163

benches/dirent_bench.rs

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,8 @@
11
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
22

3+
use core::num::NonZeroU64;
34
use std::hint::black_box;
45

5-
#[inline]
6-
pub(crate) const fn repeat_u64(byte: u8) -> u64 {
7-
u64::from_ne_bytes([byte; size_of::<u64>()])
8-
}
9-
10-
const LO_U64: u64 = repeat_u64(0x01);
11-
12-
const HI_U64: u64 = repeat_u64(0x80);
13-
146
#[inline]
157
//modified version to work for this test function(copy pasted really)
168
pub const unsafe fn dirent_const_time_strlen(dirent: *const LibcDirent64) -> usize {
@@ -19,7 +11,8 @@ pub const unsafe fn dirent_const_time_strlen(dirent: *const LibcDirent64) -> usi
1911
// Access the last field and then round up to find the minimum struct size
2012
const MINIMUM_DIRENT_SIZE: usize = DIRENT_HEADER_START.next_multiple_of(8);
2113

22-
use core::num::NonZeroU64;
14+
const LO_U64: u64 = u64::from_ne_bytes([0x01; size_of::<u64>()]);
15+
const HI_U64: u64 = u64::from_ne_bytes([0x80; size_of::<u64>()]);
2316

2417
/* Accessing `d_reclen` is safe because the struct is kernel-provided.
2518
/ SAFETY: `dirent` is valid by precondition */
@@ -29,20 +22,13 @@ pub const unsafe fn dirent_const_time_strlen(dirent: *const LibcDirent64) -> usi
2922
Read the last 8 bytes of the struct as a u64.
3023
This works because dirents are always 8-byte aligned. */
3124
// SAFETY: We're indexing in bounds within the pointer (it is guaranteed aligned by the kernel)
32-
let last_word: u64 = unsafe { *(dirent.cast::<u8>()).add(reclen - 8).cast::<u64>() };
25+
let last_word: u64 = unsafe { *(dirent.byte_add(reclen - 8).cast::<u64>()) };
3326
/* Note, I don't index as a u64 with eg (reclen-8)/8 or (reclen-8)>>3 because that adds a division which is a costly operation, relatively speaking
3427
let last_word: u64 = unsafe { *(dirent.cast::<u64>()).add((reclen - 8)/8 (or >>3))}; //this will also work but it's less performant (MINUTELY)
3528
*/
3629

37-
#[cfg(target_endian = "little")]
38-
const MASK: u64 = 0x00FF_FFFFu64;
39-
#[cfg(target_endian = "big")]
40-
const MASK: u64 = 0xFFFF_FF00_0000_0000u64; // byte order is shifted unintuitively on big endian!
30+
const MASK: u64 = u64::from_ne_bytes([0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00]);
4131

42-
/* When the record length is 24/`MINIMUM_DIRENT_SIZE`, the kernel may insert nulls before d_name.
43-
Which will exist on index's 17/18 (or opposite, for big endian...sigh.,)
44-
Mask them out to avoid false detection of a terminator.
45-
Multiplying by 0 or 1 applies the mask conditionally without branching. */
4632
let mask: u64 = MASK * ((reclen == MINIMUM_DIRENT_SIZE) as u64);
4733
/*
4834
Apply the mask to ignore non-name bytes while preserving name bytes.
@@ -53,21 +39,12 @@ pub const unsafe fn dirent_const_time_strlen(dirent: *const LibcDirent64) -> usi
5339
*/
5440
let candidate_pos: u64 = last_word | mask;
5541

56-
/*
57-
Locate the first null byte in constant time using SWAR.
58-
Subtract the position of the index of the 0 then add 1 to compute its position relative to the start of d_name.
59-
60-
SAFETY: The u64 can never be all 0's post-SWAR, therefore we can make a niche optimisation
61-
https://doc.rust-lang.org/std/num/struct.NonZero.html#tymethod.trailing_zeros
62-
https://doc.rust-lang.org/std/num/struct.NonZero.html#tymethod.leading_zeros
63-
(using ctlz_nonzero instruction which is superior to ctlz but can't handle all 0 numbers)
64-
*/
6542
let zero_bit = unsafe {
6643
NonZeroU64::new_unchecked(candidate_pos.wrapping_sub(LO_U64) & !candidate_pos & HI_U64)
6744
};
6845
#[cfg(target_endian = "little")]
6946
let byte_pos = 8 - (zero_bit.trailing_zeros() >> 3) as usize;
70-
#[cfg(not(target_endian = "little"))]
47+
#[cfg(target_endian = "big")]
7148
let byte_pos = 8 - (zero_bit.leading_zeros() >> 3) as usize;
7249

7350
/* Final length:

src/config.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::BytePath as _;
22
use crate::cli_helpers::SizeFilter;
33
use crate::glob_to_regex;
44
use crate::{DirEntry, FileType, SearchConfigError};
5-
use core::num::NonZeroUsize;
5+
use core::num::NonZeroU32;
66
use regex::bytes::{Regex, RegexBuilder};
77
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
88
/// File type filter for directory traversal
@@ -152,7 +152,7 @@ pub struct SearchConfig {
152152
///if this is Some, then we match against the extension of the file otherwise accept (if none)
153153
pub(crate) file_name_only: bool,
154154
///if true, then we only match against the file name, otherwise we match against the full path when regexing
155-
pub(crate) depth: Option<NonZeroUsize>,
155+
pub(crate) depth: Option<NonZeroU32>,
156156
///the maximum depth to search, if None then no limit
157157
pub(crate) follow_symlinks: bool, //if true, then we follow symlinks, otherwise we do not follow them
158158
/// a size filter
@@ -176,7 +176,7 @@ impl SearchConfig {
176176
keep_dirs: bool,
177177
filenameonly: bool,
178178
extension_match: Option<Box<[u8]>>,
179-
depth: Option<NonZeroUsize>,
179+
depth: Option<NonZeroU32>,
180180
follow_symlinks: bool,
181181
size_filter: Option<SizeFilter>,
182182
type_filter: Option<FileTypeFilter>,

src/finderbuilder.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use core::num::NonZeroUsize;
1+
use core::num::NonZeroU32;
22

33
use crate::{
44
DirEntry, DirEntryFilter, FileTypeFilter, FilterType, Finder, SearchConfig, SearchConfigError,
@@ -10,8 +10,7 @@ use std::{
1010
ffi::{OsStr, OsString},
1111
fs::metadata,
1212
os::unix::fs::MetadataExt as _,
13-
path::Path,
14-
path::PathBuf,
13+
path::{Path, PathBuf},
1514
};
1615

1716
//Set the threadcount at compile time (backing to a minimum of 1, **this should never happen**)
@@ -34,7 +33,7 @@ pub struct FinderBuilder {
3433
pub(crate) keep_dirs: bool,
3534
pub(crate) file_name_only: bool,
3635
pub(crate) extension_match: Option<Box<[u8]>>,
37-
pub(crate) max_depth: Option<NonZeroUsize>,
36+
pub(crate) max_depth: Option<NonZeroU32>,
3837
pub(crate) follow_symlinks: bool,
3938
pub(crate) filter: Option<DirEntryFilter>,
4039
pub(crate) size_filter: Option<SizeFilter>,
@@ -120,11 +119,11 @@ impl FinderBuilder {
120119
}
121120
#[must_use]
122121
/// Set maximum search depth
123-
pub const fn max_depth(mut self, max_depth: Option<usize>) -> Self {
122+
pub const fn max_depth(mut self, max_depth: Option<u32>) -> Self {
124123
match max_depth {
125124
None => self,
126125
Some(num) => {
127-
self.max_depth = core::num::NonZeroUsize::new(num);
126+
self.max_depth = core::num::NonZeroU32::new(num);
128127
self
129128
}
130129
}
@@ -264,9 +263,7 @@ impl FinderBuilder {
264263
}
265264
};
266265

267-
let inode_cache: Option<DashSet<(u64, u64)>> =
268-
(self.same_filesystem || self.follow_symlinks).then(DashSet::new);
269-
//Enable the cache if same file system too, this helps de-duplicate for free (since it's 1 stat call regardless)
266+
let inode_cache: Option<DashSet<(u64, u64)>> = self.follow_symlinks.then(DashSet::new);
270267

271268
Ok(Finder {
272269
root: resolved_root,

src/iter.rs

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#![allow(clippy::must_use_candidate)]
22
use crate::FileType;
3-
#[cfg(any(target_os = "linux", target_os = "macos", target_os = "android"))]
4-
use crate::types::SyscallBuffer;
53
use crate::{DirEntry, FileDes, Result};
64
use crate::{dirent64, readdir64};
75
use core::cell::Cell;
@@ -120,7 +118,7 @@ pub struct GetDents {
120118
pub(crate) fd: FileDes,
121119
/// Kernel buffer for batch reading directory entries via system call I/O
122120
/// Approximately 4.1KB in size, optimised for typical directory traversal
123-
pub(crate) syscall_buffer: SyscallBuffer,
121+
pub(crate) syscall_buffer: crate::types::SyscallBuffer,
124122
/// buffer for constructing full entry paths
125123
/// Reused for each entry to avoid repeated memory allocation (only constructed once per dir)
126124
pub(crate) path_buffer: Vec<u8>,
@@ -291,7 +289,7 @@ impl GetDents {
291289

292290
#[inline]
293291
/// Provides read only access to the internal buffer that holds the bytes read from the syscall
294-
pub const fn borrow_syscall_buffer(&self) -> &SyscallBuffer {
292+
pub const fn borrow_syscall_buffer(&self) -> &crate::types::SyscallBuffer {
295293
&self.syscall_buffer
296294
}
297295

@@ -301,7 +299,7 @@ impl GetDents {
301299
debug_assert!(fd.is_open(), "We expect it to always be open");
302300

303301
let (path_buffer, path_len) = Self::init_from_direntry(dir);
304-
let buffer = SyscallBuffer::new();
302+
let buffer = crate::types::SyscallBuffer::new();
305303
Ok(Self {
306304
fd,
307305
syscall_buffer: buffer,
@@ -602,12 +600,6 @@ macro_rules! impl_iter {
602600
};
603601
}
604602

605-
impl_iter!(ReadDir);
606-
#[cfg(any(target_os = "linux", target_os = "android"))]
607-
impl_iter!(GetDents);
608-
#[cfg(target_os = "macos")]
609-
impl_iter!(GetDirEntries);
610-
611603
// simple repetition avoider
612604
macro_rules! impl_dirent_constructor {
613605
($type:ty) => {
@@ -652,19 +644,24 @@ macro_rules! impl_iterator_for_dirent {
652644
};
653645
}
654646

647+
// Common to all platforms
648+
impl_iter!(ReadDir);
655649
impl_iterator_for_dirent!(ReadDir);
650+
impl_dirent_constructor!(ReadDir);
656651

652+
// Linux/Android specific
653+
#[cfg(any(target_os = "linux", target_os = "android"))]
654+
impl_iter!(GetDents);
657655
#[cfg(any(target_os = "linux", target_os = "android"))]
658656
impl_iterator_for_dirent!(GetDents);
659-
660-
#[cfg(target_os = "macos")]
661-
impl_iterator_for_dirent!(GetDirEntries);
662-
663-
impl_dirent_constructor!(ReadDir);
664-
665657
#[cfg(any(target_os = "linux", target_os = "android"))]
666658
impl_dirent_constructor!(GetDents);
667659

660+
// macOS specific
661+
#[cfg(target_os = "macos")]
662+
impl_iter!(GetDirEntries);
663+
#[cfg(target_os = "macos")]
664+
impl_iterator_for_dirent!(GetDirEntries);
668665
#[cfg(target_os = "macos")]
669666
impl_dirent_constructor!(GetDirEntries);
670667

@@ -684,7 +681,7 @@ pub struct GetDirEntries {
684681
pub(crate) fd: FileDes,
685682
/// Kernel buffer for batch reading directory entries via system call I/O
686683
/// ~32kb in size (8*4096, matching macos readdir semantics) in size, optimised for typical directory traversal
687-
pub(crate) syscall_buffer: SyscallBuffer,
684+
pub(crate) syscall_buffer: crate::types::SyscallBuffer,
688685
/// buffer for constructing full entry paths
689686
/// Reused for each entry to avoid repeated memory allocation (only constructed once per dir)
690687
pub(crate) path_buffer: Vec<u8>,
@@ -782,7 +779,7 @@ impl GetDirEntries {
782779
debug_assert!(fd.is_open(), "We expect it to always be open");
783780

784781
let (path_buffer, path_len) = Self::init_from_direntry(dir);
785-
let buffer = SyscallBuffer::new();
782+
let buffer = crate::types::SyscallBuffer::new();
786783
Ok(Self {
787784
fd,
788785
syscall_buffer: buffer,

0 commit comments

Comments
 (0)