Skip to content

Commit 44362f5

Browse files
committed
tidyup:minor api cleanup and explanation
1 parent c8daaee commit 44362f5

8 files changed

Lines changed: 249 additions & 240 deletions

File tree

README.md

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ To avoid issues, use --same-file-system when traversing symlinks. Both fd and fi
122122

123123
- **getdirentries64: Optimised approach following a very similar approach to the above method**
124124

125-
- **find_char_in_word**: Locates the first occurrence of a byte in a 64-bit word using SWAR (SIMD within a register), implemented as a const function
125+
- **find_char_in_word/find_last_char_in_word**: Locates the first/last occurrence of a byte in a 64-bit word using SWAR (SIMD within a register), implemented as a const function
126126

127127
- **Compile-time colour mapping**: A compile-time perfect hashmap for colouring file paths, defined in a [separate repository](https://github.com/alexcu2718/compile_time_ls_colours)
128128

@@ -141,29 +141,33 @@ Check source code for further explanation [in src/utils.rs](./src/utils.rs#L195)
141141
pub const unsafe fn dirent_const_time_strlen(drnt: *const dirent64) -> usize {
142142
use core::num::NonZeroU64;
143143
/*The only unsafe action is dereferencing the pointer
144-
This MUST be validated beforehand */
145-
const LO_U64:u64=u64::from_ne_bytes([0x01; size_of::<u64>()]);
144+
This MUST be validated beforehand */
145+
const LO_U64: u64 = u64::from_ne_bytes([0x01; size_of::<u64>()]);
146146
const HI_U64: u64 = u64::from_ne_bytes([0x80; size_of::<u64>()]);
147-
// Create a mask for the first 3 bytes in the case where reclen==24
147+
// Create a mask for the first 3 bytes in the case where reclen==24
148148
const MASK: u64 = u64::from_ne_bytes([0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00]);
149-
const DIRENT_HEADER_START: usize = std::mem::offset_of!(dirent64, d_name);
150-
let reclen = unsafe { (*drnt).d_reclen as usize };
149+
const DIRENT_HEADER_START: usize = core::mem::offset_of!(dirent64, d_name);
150+
let reclen = unsafe { (*drnt).d_reclen as usize };
151151
// Access the last 8 bytes of the word (this is an aligned read due to kernel providing 8 byte aligned dirent structs!)
152152
let last_word: u64 = unsafe { *(drnt.byte_add(reclen - 8).cast::<u64>()) };
153153
// reclen is always multiple of 8 so alignment is guaranteed
154154
let mask = MASK * ((reclen == 24) as u64); // branchless mask
155155
let candidate_pos = last_word | mask; //Mask out the false nulls when d_name is short
156156
//The idea is to convert each 0-byte to 0x80, and each nonzero byte to 0x00
157-
let zero_bit = unsafe { // Use specialised instructions (ctl_nonzero)
157+
let zero_bit = unsafe {
158+
// Use specialised instructions (ctlz_nonzero)
158159
//to avoid 0 check for bitscan forward so it compiles to tzcnt on most CPU's
159-
NonZeroU64::new_unchecked(candidate_pos.wrapping_sub(LO_U64) & !candidate_pos & HI_U64)
160-
};
160+
NonZeroU64::new_unchecked(candidate_pos.wrapping_sub(LO_U64) & !candidate_pos & HI_U64)
161+
};
161162

163+
// Find the position of the null terminator
162164
#[cfg(target_endian = "little")]
163-
let byte_pos = 8 - (zero_bit.trailing_zeros() >> 3) as usize;
165+
let byte_pos = (zero_bit.trailing_zeros() >> 3) as usize;
164166
#[cfg(target_endian = "big")]
165-
let byte_pos = 8 - (zero_bit.leading_zeros() >> 3) as usize;
166-
reclen - DIRENT_HEADER_START - byte_pos
167+
let byte_pos = (zero_bit.leading_zeros() >> 3) as usize;
168+
// reclen-DIRENT_HEADER start is the maximum size of the string
169+
// we then use the position of the `true` null terminator and subtract the 8, it's junk.
170+
reclen - DIRENT_HEADER_START + byte_pos - 8
167171
}
168172

169173

benches/dirent_bench.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,41 +6,33 @@ use std::hint::black_box;
66
#[inline]
77
//modified version to work for this test function(copy pasted really)
88
pub const unsafe fn dirent_const_time_strlen(dirent: *const LibcDirent64) -> usize {
9-
// Offset from the start of the struct to the beginning of d_name.
109
const DIRENT_HEADER_START: usize = core::mem::offset_of!(LibcDirent64, d_name);
11-
// Access the last field and then round up to find the minimum struct size
1210
const MINIMUM_DIRENT_SIZE: usize = DIRENT_HEADER_START.next_multiple_of(8);
13-
1411
const LO_U64: u64 = u64::from_ne_bytes([0x01; size_of::<u64>()]);
1512
const HI_U64: u64 = u64::from_ne_bytes([0x80; size_of::<u64>()]);
16-
1713
let reclen = unsafe { (*dirent).d_reclen } as usize;
18-
1914
/*
2015
Read the last 8 bytes of the struct as a u64.
2116
This works because dirents are always 8-byte aligned. */
2217
// SAFETY: We're indexing in bounds within the pointer (it is guaranteed aligned by the kernel)
2318
let last_word: u64 = unsafe { *(dirent.byte_add(reclen - 8).cast::<u64>()) };
24-
2519

2620
const MASK: u64 = u64::from_ne_bytes([0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00]);
2721

2822
let mask: u64 = MASK * ((reclen == MINIMUM_DIRENT_SIZE) as u64);
29-
23+
3024
let candidate_pos: u64 = last_word | mask;
3125

3226
let zero_bit = unsafe {
3327
NonZeroU64::new_unchecked(candidate_pos.wrapping_sub(LO_U64) & !candidate_pos & HI_U64)
3428
};
29+
// Find the position of the null terminator
3530
#[cfg(target_endian = "little")]
36-
let byte_pos = 8 - (zero_bit.trailing_zeros() >> 3) as usize;
31+
let byte_pos = (zero_bit.trailing_zeros() >> 3) as usize;
3732
#[cfg(target_endian = "big")]
38-
let byte_pos = 8 - (zero_bit.leading_zeros() >> 3) as usize;
33+
let byte_pos = (zero_bit.leading_zeros() >> 3) as usize;
3934

40-
/* Final length:
41-
total record length - header size - null byte position
42-
*/
43-
reclen - DIRENT_HEADER_START - byte_pos
35+
reclen - DIRENT_HEADER_START + byte_pos - 8
4436
}
4537

4638
#[repr(C)]

src/config.rs

Lines changed: 53 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::cli_helpers::{SizeFilter, TimeFilter};
33
use crate::glob_to_regex;
44
use crate::{DirEntry, FileType, SearchConfigError};
55
use core::num::NonZeroU32;
6+
use core::ops::Deref;
67
use core::time::Duration;
78
use regex::bytes::{Regex, RegexBuilder};
89
use std::time::UNIX_EPOCH;
@@ -76,45 +77,47 @@ impl FileTypeFilter {
7677
}
7778
}
7879

79-
/// Parses a character into a `FileTypeFilter`
80-
///
81-
/// This method converts a single character into the corresponding file type filter,
82-
/// which is useful for parsing command-line arguments or configuration files.
83-
///
84-
/// # Parameters
85-
/// - `c`: The character to parse into a file type filter
86-
///
87-
/// # Returns
88-
/// - `Ok(FileTypeFilter)` if the character represents a valid file type
89-
/// - `Err(String)` with an error message if the character is invalid
90-
///
91-
/// # Supported Characters
92-
/// - `'d'` - Directory
93-
/// - `'u'` - Unknown file type
94-
/// - `'l'` - Symbolic link
95-
/// - `'f'` - Regular file
96-
/// - `'p'` - Named pipe (FIFO)
97-
/// - `'c'` - Character device
98-
/// - `'b'` - Block device
99-
/// - `'s'` - Socket
100-
/// - `'e'` - Empty file
101-
/// - `'x'` - Executable file
102-
///
103-
/// # Examples
104-
/// ```
105-
/// # use fdf::FileTypeFilter;
106-
/// assert!(FileTypeFilter::from_char('d').is_ok());
107-
/// assert!(FileTypeFilter::from_char('f').is_ok());
108-
/// assert!(FileTypeFilter::from_char('z').is_err()); // Invalid character
109-
///
110-
/// let filter = FileTypeFilter::from_char('l').unwrap();
111-
/// assert!(matches!(filter, FileTypeFilter::Symlink));
112-
/// ```
113-
///
114-
/// # Errors
115-
/// Returns an error if the character does not correspond to any known file type.
116-
/// The error message includes the invalid character and suggests using `--help`
117-
/// to see valid types.
80+
/**
81+
Parses a character into a `FileTypeFilter`
82+
83+
This method converts a single character into the corresponding file type filter,
84+
which is useful for parsing command-line arguments or configuration files.
85+
86+
# Parameters
87+
- `c`: The character to parse into a file type filter
88+
89+
# Returns
90+
- `Ok(FileTypeFilter)` if the character represents a valid file type
91+
- `Err(String)` with an error message if the character is invalid
92+
93+
# Supported Characters
94+
- `'d'` - Directory
95+
- `'u'` - Unknown file type
96+
- `'l'` - Symbolic link
97+
- `'f'` - Regular file
98+
- `'p'` - Named pipe (FIFO)
99+
- `'c'` - Character device
100+
- `'b'` - Block device
101+
- `'s'` - Socket
102+
- `'e'` - Empty file
103+
- `'x'` - Executable file
104+
105+
# Examples
106+
```
107+
# use fdf::FileTypeFilter;
108+
assert!(FileTypeFilter::from_char('d').is_ok());
109+
assert!(FileTypeFilter::from_char('f').is_ok());
110+
assert!(FileTypeFilter::from_char('z').is_err()); // Invalid character
111+
112+
let filter = FileTypeFilter::from_char('l').unwrap();
113+
assert!(matches!(filter, FileTypeFilter::Symlink));
114+
```
115+
116+
# Errors
117+
Returns an error if the character does not correspond to any known file type.
118+
The error message includes the invalid character and suggests using `--help`
119+
to see valid types.
120+
*/
118121
pub fn from_char(c: char) -> core::result::Result<Self, String> {
119122
match c {
120123
'd' => Ok(Self::Directory),
@@ -249,7 +252,7 @@ impl SearchConfig {
249252
/// Checks for extension match via memchr
250253
pub fn matches_extension<S>(&self, entry: &S) -> bool
251254
where
252-
S: core::ops::Deref<Target = [u8]>,
255+
S: Deref<Target = [u8]>,
253256
{
254257
debug_assert!(
255258
!entry.contains(&b'/'),
@@ -267,10 +270,12 @@ impl SearchConfig {
267270
reason = "Only checking regular files"
268271
)]
269272
#[allow(clippy::cast_sign_loss)] //signloss dont matter here
270-
/// Applies the configured size filter to a directory entry, if any.
271-
/// For regular files the size is checked directly.
272-
/// For symlinks, the target is resolved first and then checked if it is a regular file.
273-
/// Other file types are ignored.
273+
/**
274+
Applies the configured size filter to a directory entry, if any.
275+
For regular files the size is checked directly.
276+
For symlinks, the target is resolved first and then checked if it is a regular file.
277+
Other file types are ignored.
278+
*/
274279
pub fn matches_size(&self, entry: &DirEntry) -> bool {
275280
let Some(filter_size) = self.size_filter else {
276281
return true; // No filter means always match
@@ -281,17 +286,10 @@ impl SearchConfig {
281286
.file_size()
282287
.ok()
283288
.is_some_and(|sz| filter_size.is_within_size(sz as _)),
284-
285-
FileType::Symlink => {
286-
if let Ok(path) = entry.to_full_path() {
287-
if path.is_regular_file() {
288-
if let Ok(sz) = entry.file_size() {
289-
return filter_size.is_within_size(sz as _);
290-
}
291-
}
292-
}
293-
false
294-
}
289+
// Resolve to full path first, this basically avoids broken symlinks
290+
FileType::Symlink => entry.to_full_path_with_stat().is_ok_and(|(path, statted)| {
291+
path.is_regular_file() && filter_size.is_within_size(statted.st_size as _)
292+
}),
295293

296294
_ => false,
297295
}
@@ -344,8 +342,6 @@ impl SearchConfig {
344342
#[expect(clippy::indexing_slicing, reason = "used for debug assert")]
345343
/// Checks if the path or file name matches the regex filter
346344
/// If `full_path` is false, only checks the filename
347-
///
348-
/// Use a branchless trick to do indexing
349345
pub fn matches_path(&self, dir: &DirEntry, full_path: bool) -> bool {
350346
debug_assert!(
351347
!dir.file_name().contains(&b'/'),

src/direntry.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,35 @@ impl DirEntry {
781781
})
782782
})
783783
}
784+
#[inline]
785+
/// Similar to above function but modified for internal use within size filtering.
786+
/// TODO, make this public at some point.
787+
/// This just avoids calling stat twice basically.
788+
pub(crate) fn to_full_path_with_stat(&self) -> Result<(Self, stat)> {
789+
debug_assert!(
790+
self.is_symlink(),
791+
"in this private function we expect the file type to always be symlinks!"
792+
);
793+
self.get_realpath(|full_path| {
794+
let file_name_index = full_path.to_bytes().file_name_index();
795+
796+
let statted = self.get_stat()?;
797+
798+
let (file_type, ino) = (FileType::from_stat(&statted), access_stat!(statted, st_ino));
799+
800+
Ok((
801+
Self {
802+
path: full_path.into(),
803+
file_type,
804+
inode: ino,
805+
depth: self.depth,
806+
file_name_index,
807+
is_traversible_cache: Cell::new(Some(file_type == FileType::Directory)),
808+
},
809+
statted,
810+
))
811+
})
812+
}
784813

785814
#[inline]
786815
/**

src/lib.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ mod test;
177177
pub use buffer::{AlignedBuffer, ValueType};
178178

179179
mod memchr_derivations;
180-
pub use memchr_derivations::{contains_zero_byte, find_char_in_word, find_zero_byte_u64, memrchr};
180+
pub use memchr_derivations::{
181+
contains_zero_byte, find_char_in_word, find_last_char_in_word, find_zero_byte_u64, memrchr,
182+
};
181183
mod direntry;
182184
pub use direntry::DirEntry;
183185

@@ -398,6 +400,8 @@ impl Finder {
398400
|| {
399401
// Fast path: only calls stat IFF self.starting_filesystem is Some
400402
debug_assert!(!self.search_config.follow_symlinks,"we expect follow symlinks to be disabled when following this path");
403+
404+
401405
self.starting_filesystem.is_none_or(|start_dev| {
402406
dir.get_stat()
403407
.is_ok_and(|statted| start_dev == access_stat!(statted, st_dev))
@@ -415,11 +419,12 @@ impl Finder {
415419
}
416420

417421
// Symlinks that may point to directories
418-
// This could be optimised a bit, symlinks are a beast due to their complexity.
419422
// self.search_config.follow_symlinks <=> inode_cache is some
420423
FileType::Symlink
421424
if self.inode_cache.as_ref().is_some_and(|cache| {
422425
debug_assert!(self.search_config.follow_symlinks,"we expect follow symlinks to be enabled when following this path");
426+
427+
423428
dir.get_stat().is_ok_and(|stat| {
424429
FileType::from_stat(&stat) == FileType::Directory &&
425430
// Check filesystem boundary

0 commit comments

Comments
 (0)