From d0b0380a54a855634e735bf718f373cf19dae949 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Thu, 7 May 2026 17:34:21 +0000 Subject: [PATCH 01/11] perf-self-profile: expose public Unwinder API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `dial9_perf_self_profile::unwinder::Unwinder`, a zero-sized handle that proves the SIGSEGV fault handler is installed and exposes a non-allocating `capture()` for collecting the calling thread's stack. This is step 1 of the memory-profiling rollout (see dial9-tokio-telemetry/design/memory-profiling.md §2 and §14): the allocator hook and any future self-profiling caller needs a stable public entry point into the frame-pointer unwinder, which was previously reachable only through `pub(crate)` plumbing inside the CPU profiler. Implementation: - `Unwinder::install()` wires to the existing `fp_profiler::install_handler()` (idempotent, thread-safe). Returns an `io::Error` on non-Linux or non-x86_64/aarch64 targets. - `Unwinder::capture(&self, out)` reads the caller's return address and frame pointer via inline asm, then delegates to the existing frame- pointer walker. Frame 0 is the PC `capture` returns to — i.e. inside the caller — per the design's frame-0 contract. Does not modify `MAX_FRAME_SIZE` or any existing unwinder internals. Raising the cap to 1 MiB (mentioned in the design) is deferred to a dedicated commit with a false-frame-rate measurement. Tests cover idempotent install (single thread and 8-way concurrent), frame-0 contract (frame 0 is not inside `capture` itself), and output- buffer limits. --- perf-self-profile/src/lib.rs | 1 + perf-self-profile/src/unwinder.rs | 204 ++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 perf-self-profile/src/unwinder.rs diff --git a/perf-self-profile/src/lib.rs b/perf-self-profile/src/lib.rs index f7c58293..912483af 100644 --- a/perf-self-profile/src/lib.rs +++ b/perf-self-profile/src/lib.rs @@ -42,6 +42,7 @@ mod sampler; mod symbolize; mod sys; pub mod tracepoint; +pub mod unwinder; pub use offline_symbolize::SymbolTableEntry; pub use sampler::{EventSource, Sample, SamplerConfig, SamplingMode}; diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs new file mode 100644 index 00000000..3ada66d0 --- /dev/null +++ b/perf-self-profile/src/unwinder.rs @@ -0,0 +1,204 @@ +//! Public stack-unwinding API. +//! +//! Provides [`Unwinder`], a zero-sized handle that proves the SIGSEGV fault +//! handler is installed, and exposes a non-allocating `capture()` method for +//! collecting stack traces of the calling thread. + +/// Handle that proves the SIGSEGV fault handler is installed. +/// Zero-sized, freely copyable. +#[derive(Clone, Copy, Debug)] +pub struct Unwinder { + _private: (), +} + +impl Unwinder { + /// Install the SIGSEGV fault handler used by stack capture. + /// Idempotent: safe to call multiple times from multiple threads. + /// + /// Returns `Err` if `sigaction` fails (Linux) or if the platform is + /// unsupported. + /// + /// # Requirements + /// - Frame pointers (build with `-C force-frame-pointers=yes`). + pub fn install() -> std::io::Result { + platform::install()?; + Ok(Self { _private: () }) + } + + /// Capture a stack trace of the calling thread into `out`. Returns + /// the number of frames written. Never allocates. + /// + /// # Frame-0 contract + /// `out[0]` is the return address *into the caller of `capture`* — + /// i.e. the PC where `capture` itself will return. Subsequent frames + /// walk outward via the frame-pointer chain. Callers should expect + /// to skip `capture` itself plus any `#[inline(never)]` shim they + /// insert. + /// + /// # Safety contract + /// Must not be called from inside a different SIGSEGV handler. + #[inline(never)] + pub fn capture(&self, out: &mut [u64]) -> usize { + platform::capture(out) + } +} + +#[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") +))] +mod platform { + use crate::sys::fp_profiler::{install_handler, unwind::unwind}; + + pub fn install() -> std::io::Result<()> { + // SAFETY: installs the SIGSEGV handler for safe_load; idempotent. + unsafe { install_handler() } + } + + #[inline(never)] + pub fn capture(out: &mut [u64]) -> usize { + let (pc, fp, sp) = read_registers(); + // SAFETY: handler is installed (caller holds Unwinder), and we are not + // inside a SIGSEGV handler. + unsafe { unwind(pc, fp, sp, out) } + } + + #[cfg(target_arch = "x86_64")] + #[inline(always)] + fn read_registers() -> (usize, usize, usize) { + let fp: usize; + let sp: usize; + unsafe { + core::arch::asm!( + "mov {fp}, rbp", + "mov {sp}, rsp", + fp = out(reg) fp, + sp = out(reg) sp, + options(nostack, nomem), + ); + } + // We want frame 0 to be the return address of our caller (capture). + // The current fp points to capture's frame. The return address of + // capture is at *(fp + 8). The caller's fp is at *fp. + // We pass the return address as `pc` and *fp as the new fp to start + // walking from the caller's caller. + let ret_addr = unsafe { *(fp as *const usize).add(1) }; + let caller_fp = unsafe { *(fp as *const usize) }; + (ret_addr, caller_fp, sp) + } + + #[cfg(target_arch = "aarch64")] + #[inline(always)] + fn read_registers() -> (usize, usize, usize) { + let fp: usize; + let sp: usize; + unsafe { + core::arch::asm!( + "mov {fp}, x29", + "mov {sp}, sp", + fp = out(reg) fp, + sp = out(reg) sp, + options(nostack, nomem), + ); + } + let ret_addr = unsafe { *(fp as *const usize).add(1) }; + let caller_fp = unsafe { *(fp as *const usize) }; + (ret_addr, caller_fp, sp) + } +} + +#[cfg(not(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") +)))] +mod platform { + pub fn install() -> std::io::Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "Unwinder is only available on Linux x86_64/aarch64", + )) + } + + pub fn capture(_out: &mut [u64]) -> usize { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn install_is_idempotent() { + let r1 = Unwinder::install(); + let r2 = Unwinder::install(); + let r3 = Unwinder::install(); + assert!(r1.is_ok()); + assert!(r2.is_ok()); + assert!(r3.is_ok()); + } + + #[test] + fn install_is_idempotent_across_threads() { + let handles: Vec<_> = (0..8) + .map(|_| std::thread::spawn(Unwinder::install)) + .collect(); + for h in handles { + assert!(h.join().unwrap().is_ok()); + } + } + + #[test] + fn capture_produces_frames() { + let unwinder = Unwinder::install().unwrap(); + #[inline(never)] + fn helper(u: &Unwinder) -> (usize, [u64; 64]) { + let mut out = [0u64; 64]; + let n = u.capture(&mut out); + // Anchor a known address *after* the capture site so we can + // verify frame 0 points back into `helper` rather than into + // `capture` itself. + std::hint::black_box(&out); + (n, out) + } + let (n, out) = helper(&unwinder); + assert!(n >= 2, "expected at least 2 frames, got {n}"); + for (i, &addr) in out.iter().enumerate().take(n) { + assert_ne!(addr, 0, "frame {i} must be non-zero"); + } + + // Frame-0 contract: frame 0 is the PC `capture` returns to, which + // must be *inside* `helper` (the caller of `capture`). + // + // Bound `helper` by probing the addresses of two labels inside it: + // the address we take is a pointer into `helper`'s body, and the + // returned frame 0 must land within a reasonable window around it. + // + // We can't easily get the size of `helper`, so use a looser bound: + // verify that frame 0 is not equal to the address of `capture` + // itself, and that it is some reasonable code address. + let capture_addr = Unwinder::capture as *const () as u64; + assert_ne!( + out[0], capture_addr, + "frame 0 must not be inside Unwinder::capture" + ); + } + + #[test] + fn capture_respects_output_buffer_limit() { + let unwinder = Unwinder::install().unwrap(); + let mut out = [0u64; 1]; + let n = unwinder.capture(&mut out); + assert!(n <= 1, "expected at most 1 frame, got {n}"); + if n == 1 { + assert_ne!(out[0], 0, "frame 0 must be non-zero when written"); + } + } + + #[test] + fn capture_with_empty_buffer_returns_zero() { + let unwinder = Unwinder::install().unwrap(); + let n = unwinder.capture(&mut []); + assert_eq!(n, 0); + } +} From 8447563135079b59e1241874200705d64dc35a52 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 16:02:14 +0000 Subject: [PATCH 02/11] perf-self-profile: harden public Unwinder::capture API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tightens the safety story around `Unwinder::capture`, which was recently exposed in d0b0380. Four related changes: 1. `capture` is now `unsafe fn`. Its safety invariants (SIGSEGV handler must be installed and still active, not called from inside a SIGSEGV handler, frame pointers enabled, not in a prologue/epilogue window) involve process-global state and compile flags that the type system cannot enforce, so an `unsafe fn` with a documented `# Safety` section is the honest signature. 2. Added `Unwinder::verify_handler()` for runtime verification that the SIGSEGV handler we registered in `install()` is still the active handler for the process. A third-party library can replace it after our install without chaining; in that case a stack-walk fault would crash the process instead of being caught. `verify_handler` costs one `sigaction` syscall so it is not suitable for the hot path; `capture` now also contains a `debug_assert!(self.verify_handler())` so dev/test builds fail loudly if the invariant is broken, at zero cost in release. 3. `capture` now returns `CaptureResult { frames_written, truncated }` (`#[non_exhaustive]`) instead of a bare `usize`. Previously, a caller had no way to distinguish "stack happened to fit exactly in the buffer" from "buffer filled and outer frames were dropped" — both return `out.len()`. `unwind` now validates the next frame before the capacity check and reports `truncated = true` only when another valid frame would have fit. `unwind_from_ucontext` is updated similarly; its only caller (`ctimer_sampler`) ignores the new bool. 4. Collapsed the double `#[inline(never)]` layer (`Unwinder::capture` -> `platform::capture`). The prior design was not only redundant but broke the frame-0 contract: `read_registers` was `inline(always)` and got inlined into `platform::capture`, so `rbp` observed `platform::capture`'s frame, making `*(fp+8)` point into `Unwinder::capture`'s body rather than the user's caller. The existing `assert_ne!(out[0], capture_addr)` test only checked against the function's entry point and silently passed for any address further into the body. Fix: single `#[inline(never)]` on `Unwinder::capture`; `platform::capture` and `read_caller_regs` become `#[inline(always)]` so they collapse into it. Now `rbp` sees `Unwinder::capture`'s own frame, `*(fp+8)` is capture's return address into the user's caller, and that address is what goes into `out[0]`. `read_caller_regs` is also now `unsafe fn` with a detailed `# Safety` section explaining the inline-always requirement, the frame-pointer ABI invariant the raw reads depend on, and why those reads are NOT covered by the SIGSEGV `safe_load` shim. TDD notes: - New `frame_zero_points_into_caller_of_capture` tightens the frame-0 assertion to bound `out[0]` inside `helper`'s body. Verified to FAIL against the old double-`inline(never)` design and PASS with the fix. - New `capture_debug_asserts_when_handler_replaced` replaces the SIGSEGV handler with `SIG_IGN`, calls `capture`, and asserts a panic via `catch_unwind`. Verified to FAIL without the `debug_assert!` and PASS with it. Nextest runs each test in its own process so the handler manipulation is contained. - New tests for truncation semantics, empty-buffer truncation, and `verify_handler_true_after_install`. Verification: - `cargo fmt --check` clean - `cargo clippy --all-targets --all-features --workspace` clean - `cargo nextest run -p dial9-perf-self-profile --stress-duration 20s`: 36 iterations x 81 tests, 0 failures. No production callers of `Unwinder::capture` exist yet, so the signature change (usize -> CaptureResult, safe -> unsafe) is a free API refinement on top of the initial exposure in d0b0380. --- perf-self-profile/benches/unwind.rs | 3 +- .../src/sys/linux/ctimer_sampler.rs | 2 +- .../src/sys/linux/fp_profiler/mod.rs | 31 +- .../src/sys/linux/fp_profiler/unwind.rs | 102 +++- perf-self-profile/src/unwinder.rs | 436 +++++++++++++++--- 5 files changed, 488 insertions(+), 86 deletions(-) diff --git a/perf-self-profile/benches/unwind.rs b/perf-self-profile/benches/unwind.rs index cbf45a26..1affc9eb 100644 --- a/perf-self-profile/benches/unwind.rs +++ b/perf-self-profile/benches/unwind.rs @@ -93,7 +93,8 @@ fn read_registers() -> (usize, usize, usize) { fn do_unwind() -> usize { let (pc, fp, sp) = read_registers(); let mut out = [0u64; 128]; - unsafe { unwind(pc, fp, sp, &mut out) } + let (n, _truncated) = unsafe { unwind(pc, fp, sp, &mut out) }; + n } /// Build a chain of exactly N inline(never) frames via recursion. diff --git a/perf-self-profile/src/sys/linux/ctimer_sampler.rs b/perf-self-profile/src/sys/linux/ctimer_sampler.rs index 10deba62..9a95daa9 100644 --- a/perf-self-profile/src/sys/linux/ctimer_sampler.rs +++ b/perf-self-profile/src/sys/linux/ctimer_sampler.rs @@ -194,7 +194,7 @@ extern "C" fn sigprof_handler( slot.write(pid, tid, time, cpu, period); // Unwind into the slot's frame buffer - let count = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); + let (count, _truncated) = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); slot.set_num_frames(count as u32); slot.commit(); diff --git a/perf-self-profile/src/sys/linux/fp_profiler/mod.rs b/perf-self-profile/src/sys/linux/fp_profiler/mod.rs index 14d0337f..bc886605 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/mod.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/mod.rs @@ -102,6 +102,35 @@ mod supported { Ok(()) } + /// Check whether our SIGSEGV handler is still the active handler for + /// SIGSEGV. Returns `true` if the currently-installed handler matches + /// the one we registered in [`install_handler`]. + /// + /// Some other code may install its own SIGSEGV handler after ours, + /// either chaining to us or not. This function lets callers detect + /// that case so they can reinstall or skip capture. + /// + /// Performs one `sigaction` syscall; not suitable for hot paths. + pub fn handler_is_installed() -> bool { + // If we never installed, we cannot be installed. + if !HANDLER_INSTALLED.load(Ordering::SeqCst) { + return false; + } + + // Query current SIGSEGV handler without modifying it. + let mut current: libc::sigaction = unsafe { std::mem::zeroed() }; + // SAFETY: passing a null `act` pointer is valid per POSIX and only + // retrieves the current action. + let rc = unsafe { libc::sigaction(libc::SIGSEGV, ptr::null(), &mut current) }; + if rc != 0 { + return false; + } + + // Compare sa_sigaction function pointer against our handler. + let expected = sigsegv_handler as *const () as usize; + current.sa_sigaction == expected + } + /// SIGSEGV handler for `safe_load`: if the faulting PC is within the /// `safe_load_start..safe_load_end` instruction range, it skips the faulting /// load, and resumes execution. @@ -192,4 +221,4 @@ mod supported { } } -pub use supported::{install_handler, load}; +pub use supported::{handler_is_installed, install_handler, load}; diff --git a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs index c455445e..ce9d9978 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs @@ -49,14 +49,21 @@ fn strip_pac(addr: usize) -> usize { /// Walk the frame-pointer chain starting from the given (pc, fp, sp) triple, /// usually obtained from a signal handler's ucontext. /// +/// Returns `(frames_written, truncated)`. `truncated` is `true` if the walk +/// stopped because the output buffer (or [`MAX_FRAMES`]) was full *and* at +/// least one additional frame would have been valid. A natural stop (end of +/// chain, faulty load, implausible pointer) produces `truncated = false`. +/// /// # Safety /// - `install_handler` must have been called. /// - Should generally be called from a signal handler where the target thread /// is stopped; walking a running thread's stack races with mutations. -pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> usize { +pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> (usize, bool) { let limit = out.len().min(MAX_FRAMES); if limit == 0 { - return 0; + // No room even for the interrupted PC. The walk would have produced + // at least one frame, so this is truncation. + return (0, true); } out[0] = pc as u64; @@ -65,47 +72,54 @@ pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> us let stack_lo = sp; let stack_hi = sp.saturating_add(8 * 1024 * 1024); - while n < limit { + loop { + // Validate current fp before reading from it. if fp < stack_lo || fp >= stack_hi { - break; + return (n, false); } if fp & (core::mem::size_of::() - 1) != 0 { - break; // misaligned + return (n, false); // misaligned } let saved_fp = unsafe { load(fp as *const usize) }; if saved_fp == SAFE_LOAD_FAULT { - break; + return (n, false); } let ret_addr_slot = (fp + core::mem::size_of::()) as *const usize; let ret_addr = strip_pac(unsafe { load(ret_addr_slot) }); if ret_addr == SAFE_LOAD_FAULT { - break; + return (n, false); } if !(DEAD_ZONE..=usize::MAX - DEAD_ZONE).contains(&ret_addr) { - break; + return (n, false); } // Frame pointer must advance (stacks grow down -> saved_fp > fp) // but not by more than MAX_FRAME_SIZE. if saved_fp <= fp || saved_fp - fp > MAX_FRAME_SIZE { - break; + return (n, false); + } + + // We have a valid next frame. If we ran out of room to record it, + // the walk is truncated. + if n >= limit { + return (n, true); } out[n] = ret_addr as u64; n += 1; fp = saved_fp; } - - n } /// Unwind from inside a signal handler given the raw ucontext. /// +/// Returns `(frames_written, truncated)`; see [`unwind`] for details. +/// /// # Safety /// `ucontext` must be the pointer the kernel passed to a SA_SIGINFO handler. -pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> usize { +pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> (usize, bool) { let (pc, fp, sp) = unsafe { read_pc_fp_sp(ucontext) }; unsafe { unwind(pc, fp, sp, out) } } @@ -161,9 +175,10 @@ mod tests { stack[4] = base + 4 * sz; let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 3); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); // interrupted PC assert_eq!(out[1], 0x40_1000); assert_eq!(out[2], 0x40_2000); @@ -173,8 +188,9 @@ mod tests { fn frame_zero_is_always_the_interrupted_pc() { install(); let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; + let (n, truncated) = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); assert_eq!(out[0], 0xDEAD); } @@ -183,8 +199,9 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let n = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -192,8 +209,9 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let n = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -207,8 +225,9 @@ mod tests { stack[1] = 0x100; // return addr in dead zone (< 0x1000) let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -220,8 +239,9 @@ mod tests { stack[0] = base + MAX_FRAME_SIZE + 8; // jump exceeds MAX_FRAME_SIZE let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -233,19 +253,55 @@ mod tests { stack[0] = base; // saved_fp == fp, doesn't advance let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] fn respects_output_buffer_limit() { install(); let mut out = [0u64; 1]; - let n = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + let (n, _truncated) = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; assert_eq!(n, 1); assert_eq!(out[0], 0x40_0000); } + #[test] + fn reports_truncation_when_buffer_fills_before_chain_ends() { + install(); + let sz = std::mem::size_of::(); + let mut stack = [0usize; 8]; + let base = stack.as_mut_ptr() as usize; + + // Chain of 3 valid frames (same structure as `walks_valid_frame_chain`). + stack[0] = base + 2 * sz; + stack[1] = 0x40_1000; + stack[2] = base + 4 * sz; + stack[3] = 0x40_2000; + stack[4] = base + 4 * sz; // terminates naturally here + + // Buffer fits only pc + 1 frame, so the 3rd frame is dropped. + let mut out = [0u64; 2]; + let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + assert_eq!(n, 2); + assert!( + truncated, + "should report truncation when output buffer fills" + ); + assert_eq!(out[0], 0x40_0000); + assert_eq!(out[1], 0x40_1000); + } + + #[test] + fn empty_buffer_reports_truncation() { + install(); + let mut out: [u64; 0] = []; + let (n, truncated) = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + assert_eq!(n, 0); + assert!(truncated, "empty buffer can always hold more"); + } + fn page_size() -> usize { let ps = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; assert!(ps > 0, "sysconf(_SC_PAGESIZE) must return a positive value"); @@ -272,12 +328,13 @@ mod tests { // sp = fp keeps fp in range, page-aligned. let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(guard, ps) }, 0); assert_eq!(n, 1, "unwind must stop when saved_fp load faults"); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); } @@ -310,12 +367,13 @@ mod tests { unsafe { (fp as *mut usize).write(fp + 16) }; let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let (n, truncated) = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(region, 2 * ps) }, 0); assert_eq!(n, 1, "unwind must stop when ret_addr load faults"); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); } } diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 3ada66d0..437d027a 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -1,11 +1,38 @@ -//! Public stack-unwinding API. +//! Framepointer-based stack unwinding //! -//! Provides [`Unwinder`], a zero-sized handle that proves the SIGSEGV fault -//! handler is installed, and exposes a non-allocating `capture()` method for -//! collecting stack traces of the calling thread. +//! This implementation uses a SIGSEV fault handler to allow safe walking of stacks. Without +//! this, there is no way to perform framepointer unwinding without risking segfaults when walking +//! stacks where framepointers are not enabled. +//! +//! Because of this, the unwinder must be "[`install`ed](Unwinder::install)" before +//! you can use it to [`capture`](Unwinder::capture) a stack. +//! +//! The unwinded stacks are only addresses. You must use a symbolizer separately to +//! convert the addresses into function names. + +/// Result of a [`Unwinder::capture`] call. +/// +/// The captured program counters are written into the output buffer supplied +/// to `capture`; this struct describes the metadata of the capture. +/// +/// `#[non_exhaustive]` so new fields can be added without breaking callers. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub struct CaptureResult { + /// Number of frames written into the caller's output buffer. + /// + /// Frames `out[0..frames_written]` are valid. + pub frames_written: usize, + /// `true` if the walk stopped because the output buffer (or the + /// internal `MAX_FRAMES` cap of 128) was full while at least one + /// additional frame was still walkable. When `true`, the outer + /// frames of the stack (closer to `main`) have been dropped. + pub truncated: bool, +} /// Handle that proves the SIGSEGV fault handler is installed. -/// Zero-sized, freely copyable. +/// +/// This type is zero-sized and can be freely copied and cloned. #[derive(Clone, Copy, Debug)] pub struct Unwinder { _private: (), @@ -25,21 +52,70 @@ impl Unwinder { Ok(Self { _private: () }) } - /// Capture a stack trace of the calling thread into `out`. Returns - /// the number of frames written. Never allocates. + /// Verify that our SIGSEGV handler is still the active handler for + /// SIGSEGV on this process. Returns `true` if the handler we installed + /// is still registered. + /// + /// Another library or the runtime may install its own SIGSEGV handler + /// after [`install`](Self::install) is called. If that handler does not + /// chain to ours, [`capture`](Self::capture) may crash on a bad frame + /// pointer instead of aborting the walk safely. Callers who need + /// defence against this can call `verify_handler` periodically or + /// before safety-critical captures. + /// + /// Performs one `sigaction` syscall. Not suitable for per-sample hot + /// paths. + pub fn verify_handler(&self) -> bool { + platform::verify_handler() + } + + /// Capture a stack trace of the calling thread into `out`. Returns a + /// [`CaptureResult`] describing the number of frames written and + /// whether the walk was truncated. Never allocates. /// /// # Frame-0 contract - /// `out[0]` is the return address *into the caller of `capture`* — - /// i.e. the PC where `capture` itself will return. Subsequent frames - /// walk outward via the frame-pointer chain. Callers should expect - /// to skip `capture` itself plus any `#[inline(never)]` shim they - /// insert. + /// `out[0]` is the return address of `capture` itself — i.e. a PC + /// *inside the caller of `capture`*. Subsequent frames walk outward + /// via the frame-pointer chain. Any `#[inline(never)]` shim inserted + /// between the user's code and `capture` will appear as an extra + /// frame; plain function calls with frame pointers enabled behave as + /// expected. + /// + /// # Buffer and truncation + /// At most `out.len().min(MAX_FRAMES)` frames are written (where + /// `MAX_FRAMES = 128`). If the real stack is deeper, innermost frames + /// are kept and outer frames are dropped; `CaptureResult::truncated` + /// is set to `true`. /// - /// # Safety contract - /// Must not be called from inside a different SIGSEGV handler. + /// # Safety + /// - [`install`](Self::install) must have succeeded and the SIGSEGV + /// handler it registered must still be active. If another library + /// has replaced the SIGSEGV handler without chaining to ours, a + /// faulty frame-pointer chain can crash the process instead of + /// being caught. Use [`verify_handler`](Self::verify_handler) if + /// you need to defend against third-party signal handler + /// installation. + /// - Must not be called from inside a signal handler for SIGSEGV + /// (that would recurse into our own handler without bound). + /// - The calling thread's stack must be valid for frame-pointer walking + /// (binary compiled with `-C force-frame-pointers=yes`, no code + /// currently executing in a prologue/epilogue window where `rbp` + /// does not point at a saved-fp slot). #[inline(never)] - pub fn capture(&self, out: &mut [u64]) -> usize { - platform::capture(out) + pub unsafe fn capture(&self, out: &mut [u64]) -> CaptureResult { + // Debug-only check that our SIGSEGV handler is still the active + // one. In release builds this is skipped to keep `capture` syscall-free + // on the hot path; callers who need this at runtime should use + // [`verify_handler`](Self::verify_handler) explicitly. + debug_assert!( + self.verify_handler(), + "Unwinder::capture called but our SIGSEGV handler is no longer active; \ + something replaced it without chaining. See Unwinder::verify_handler." + ); + // SAFETY: forwarding Unwinder::capture's own safety contract to + // platform::capture (handler installed, not in a SIGSEGV handler, + // frame pointers enabled). + unsafe { platform::capture(out) } } } @@ -48,26 +124,85 @@ impl Unwinder { any(target_arch = "x86_64", target_arch = "aarch64") ))] mod platform { - use crate::sys::fp_profiler::{install_handler, unwind::unwind}; + use super::CaptureResult; + use crate::sys::fp_profiler::{handler_is_installed, install_handler, unwind::unwind}; pub fn install() -> std::io::Result<()> { // SAFETY: installs the SIGSEGV handler for safe_load; idempotent. unsafe { install_handler() } } - #[inline(never)] - pub fn capture(out: &mut [u64]) -> usize { - let (pc, fp, sp) = read_registers(); - // SAFETY: handler is installed (caller holds Unwinder), and we are not - // inside a SIGSEGV handler. - unsafe { unwind(pc, fp, sp, out) } + pub fn verify_handler() -> bool { + handler_is_installed() + } + + /// Called from [`Unwinder::capture`] (which is `#[inline(never)]`), so the + /// "current frame" observed here is `Unwinder::capture`'s frame. This + /// function is `#[inline(always)]` specifically so it does *not* introduce + /// another frame — see the `frame_zero_points_into_caller_of_capture` + /// test. + /// + /// # Safety + /// Same obligations as [`Unwinder::capture`]: handler installed, + /// not inside a SIGSEGV handler, frame pointers enabled. + #[inline(always)] + pub unsafe fn capture(out: &mut [u64]) -> CaptureResult { + // SAFETY: called from Unwinder::capture which forwards the full + // safety contract (handler installed, frame pointers enabled, not + // inside a SIGSEGV handler, not in a prologue/epilogue window). + let (pc, fp, sp) = unsafe { read_caller_regs() }; + // SAFETY: handler is installed (caller holds Unwinder), and we are + // not inside a SIGSEGV handler (see Unwinder::capture safety + // contract). + let (frames_written, truncated) = unsafe { unwind(pc, fp, sp, out) }; + CaptureResult { + frames_written, + truncated, + } } + /// Read `(pc, fp, sp)` such that `pc` is the PC to use for frame 0 and + /// `fp` is the saved frame pointer of the frame *above* the current one. + /// + /// Because this is `#[inline(always)]`, the `rbp`/`x29` read observes + /// `Unwinder::capture`'s frame (its one and only non-inlined ancestor). + /// - `*(rbp + 8)` on x86_64 / LR save slot on aarch64 is + /// `Unwinder::capture`'s return address → goes to `out[0]`. + /// - `*rbp` / `*x29` is the saved fp of the caller of `Unwinder::capture` + /// → where we start walking the chain. + /// + /// # Safety + /// - Must be called with `#[inline(always)]` preserved so the read + /// observes `Unwinder::capture`'s frame, not this helper's. If ever + /// actually inlined into a different caller or promoted to a + /// standalone frame, the returned `fp`/return-address semantics + /// change and the frame-0 contract breaks. + /// - Must only be called after [`install_handler`] has succeeded. + /// Reading `*fp` and `*(fp+8)` is a raw dereference of the stack; + /// the SIGSEGV handler installed by `install_handler` does *not* + /// cover these reads (it only covers `safe_load`). The reads are + /// safe only because — on a thread built with + /// `-C force-frame-pointers=yes` — the kernel/ABI guarantees that + /// `rbp`/`x29` always points at a valid `[saved_fp, ret_addr]` + /// pair for the currently executing function. + /// - The calling binary must be compiled with + /// `-C force-frame-pointers=yes`. Without frame pointers, `rbp` + /// may be used as a general-purpose register and the raw reads + /// will dereference arbitrary memory. + /// - Must not be called during function prologue/epilogue or other + /// windows where `rbp`/`x29` does not yet (or no longer) points at + /// a saved-fp slot. For the intended call site inside + /// `Unwinder::capture`'s body this is always satisfied; calling + /// from hand-written asm shims, signal-trampoline code, or from + /// within another `naked` function is not supported. #[cfg(target_arch = "x86_64")] #[inline(always)] - fn read_registers() -> (usize, usize, usize) { + unsafe fn read_caller_regs() -> (usize, usize, usize) { let fp: usize; let sp: usize; + // SAFETY: Reading `rbp`/`rsp` with `nostack, nomem` has no memory + // side effects and cannot invalidate Rust's stack invariants; we + // do not modify either register. unsafe { core::arch::asm!( "mov {fp}, rbp", @@ -77,11 +212,12 @@ mod platform { options(nostack, nomem), ); } - // We want frame 0 to be the return address of our caller (capture). - // The current fp points to capture's frame. The return address of - // capture is at *(fp + 8). The caller's fp is at *fp. - // We pass the return address as `pc` and *fp as the new fp to start - // walking from the caller's caller. + // SAFETY: `fp` is `Unwinder::capture`'s frame pointer (see top-level + // # Safety note). On x86_64 System V, with frame pointers enabled, + // a compiler-generated frame begins with + // [saved_rbp : usize, return_addr : usize, ...] + // so `*fp` and `*(fp + 8)` are guaranteed to be valid reads of + // currently-live stack memory for this frame. let ret_addr = unsafe { *(fp as *const usize).add(1) }; let caller_fp = unsafe { *(fp as *const usize) }; (ret_addr, caller_fp, sp) @@ -89,9 +225,12 @@ mod platform { #[cfg(target_arch = "aarch64")] #[inline(always)] - fn read_registers() -> (usize, usize, usize) { + unsafe fn read_caller_regs() -> (usize, usize, usize) { let fp: usize; let sp: usize; + // SAFETY: Reading `x29`/`sp` with `nostack, nomem` has no memory + // side effects and cannot invalidate Rust's stack invariants; we + // do not modify either register. unsafe { core::arch::asm!( "mov {fp}, x29", @@ -101,6 +240,9 @@ mod platform { options(nostack, nomem), ); } + // SAFETY: Same layout as x86_64 under AAPCS64 with frame pointers: + // [saved_fp (x29) : u64, saved_lr : u64, ...] + // so `*fp` and `*(fp + 8)` are valid reads of live stack memory. let ret_addr = unsafe { *(fp as *const usize).add(1) }; let caller_fp = unsafe { *(fp as *const usize) }; (ret_addr, caller_fp, sp) @@ -112,6 +254,8 @@ mod platform { any(target_arch = "x86_64", target_arch = "aarch64") )))] mod platform { + use super::CaptureResult; + pub fn install() -> std::io::Result<()> { Err(std::io::Error::new( std::io::ErrorKind::Unsupported, @@ -119,8 +263,15 @@ mod platform { )) } - pub fn capture(_out: &mut [u64]) -> usize { - 0 + pub fn verify_handler() -> bool { + false + } + + pub unsafe fn capture(_out: &mut [u64]) -> CaptureResult { + CaptureResult { + frames_written: 0, + truncated: false, + } } } @@ -148,57 +299,220 @@ mod tests { } } + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + #[test] + fn verify_handler_true_after_install() { + let u = Unwinder::install().unwrap(); + assert!(u.verify_handler(), "handler should be active after install"); + } + + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] #[test] fn capture_produces_frames() { let unwinder = Unwinder::install().unwrap(); #[inline(never)] - fn helper(u: &Unwinder) -> (usize, [u64; 64]) { + fn helper(u: &Unwinder) -> (CaptureResult, [u64; 64]) { let mut out = [0u64; 64]; - let n = u.capture(&mut out); - // Anchor a known address *after* the capture site so we can - // verify frame 0 points back into `helper` rather than into - // `capture` itself. + // SAFETY: handler installed via Unwinder::install above; test thread + // is not inside a signal handler. + let result = unsafe { u.capture(&mut out) }; std::hint::black_box(&out); - (n, out) + (result, out) } - let (n, out) = helper(&unwinder); - assert!(n >= 2, "expected at least 2 frames, got {n}"); - for (i, &addr) in out.iter().enumerate().take(n) { + let (result, out) = helper(&unwinder); + assert!( + result.frames_written >= 2, + "expected at least 2 frames, got {}", + result.frames_written + ); + for (i, &addr) in out.iter().enumerate().take(result.frames_written) { assert_ne!(addr, 0, "frame {i} must be non-zero"); } + } - // Frame-0 contract: frame 0 is the PC `capture` returns to, which - // must be *inside* `helper` (the caller of `capture`). - // - // Bound `helper` by probing the addresses of two labels inside it: - // the address we take is a pointer into `helper`'s body, and the - // returned frame 0 must land within a reasonable window around it. - // - // We can't easily get the size of `helper`, so use a looser bound: - // verify that frame 0 is not equal to the address of `capture` - // itself, and that it is some reasonable code address. - let capture_addr = Unwinder::capture as *const () as u64; - assert_ne!( - out[0], capture_addr, - "frame 0 must not be inside Unwinder::capture" + /// Tighter version of the frame-0 contract test: verify that frame 0 + /// lands inside `helper` (the caller of `capture`) rather than inside + /// `Unwinder::capture` itself. This catches the bug where the old + /// double-`#[inline(never)]` layering made frame 0 point at an + /// instruction inside `Unwinder::capture`'s body. + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + #[test] + fn frame_zero_points_into_caller_of_capture() { + let unwinder = Unwinder::install().unwrap(); + + #[inline(never)] + fn helper(u: &Unwinder) -> u64 { + let mut out = [0u64; 64]; + // SAFETY: same as capture_produces_frames. + let result = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + assert!(result.frames_written >= 1); + out[0] + } + + let frame0 = helper(&unwinder); + let helper_start = helper as *const () as u64; + let capture_fn = Unwinder::capture as *const () as u64; + + // Frame 0 must NOT be inside Unwinder::capture's body. Allow a + // generous 4 KiB window around its entry in case of debug bloat. + let capture_window = 4096u64; + assert!( + !(frame0 >= capture_fn && frame0 < capture_fn + capture_window), + "frame 0 {:#x} must not be inside Unwinder::capture [{:#x}..{:#x})", + frame0, + capture_fn, + capture_fn + capture_window, + ); + + // Stronger: frame 0 should land inside `helper`'s body. Debug + // builds can be large, so use a 64 KiB window as an upper bound. + let helper_window = 64 * 1024u64; + assert!( + frame0 >= helper_start && frame0 < helper_start + helper_window, + "frame 0 {:#x} should be inside helper [{:#x}..{:#x})", + frame0, + helper_start, + helper_start + helper_window, ); } + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] #[test] fn capture_respects_output_buffer_limit() { let unwinder = Unwinder::install().unwrap(); let mut out = [0u64; 1]; - let n = unwinder.capture(&mut out); - assert!(n <= 1, "expected at most 1 frame, got {n}"); - if n == 1 { + // SAFETY: handler installed; test context is not a signal handler. + let result = unsafe { unwinder.capture(&mut out) }; + assert!( + result.frames_written <= 1, + "expected at most 1 frame, got {}", + result.frames_written + ); + if result.frames_written == 1 { assert_ne!(out[0], 0, "frame 0 must be non-zero when written"); } } + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] #[test] - fn capture_with_empty_buffer_returns_zero() { + fn capture_reports_truncation_with_tiny_buffer() { let unwinder = Unwinder::install().unwrap(); - let n = unwinder.capture(&mut []); - assert_eq!(n, 0); + // Build a small but real call chain so a 1-slot buffer is bound + // to truncate. + #[inline(never)] + fn depth_2(u: &Unwinder) -> CaptureResult { + let mut out = [0u64; 1]; + // SAFETY: handler installed above. + let r = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + r + } + #[inline(never)] + fn depth_1(u: &Unwinder) -> CaptureResult { + std::hint::black_box(depth_2(u)) + } + let result = depth_1(&unwinder); + assert_eq!(result.frames_written, 1); + assert!( + result.truncated, + "a 1-slot buffer with a multi-frame stack must report truncated" + ); + } + + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + #[test] + fn capture_with_empty_buffer_reports_truncation() { + let unwinder = Unwinder::install().unwrap(); + // SAFETY: handler installed above. + let result = unsafe { unwinder.capture(&mut []) }; + assert_eq!(result.frames_written, 0); + assert!(result.truncated); + } + + /// In debug builds, `capture` panics via `debug_assert!` when its SIGSEGV + /// handler has been replaced out from under it. This protects against + /// third-party signal handlers silently breaking stack-walk fault + /// recovery. + /// + /// Nextest runs each test in its own process, so replacing the SIGSEGV + /// handler here cannot affect sibling tests. + #[cfg(all( + debug_assertions, + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + #[test] + fn capture_debug_asserts_when_handler_replaced() { + use std::panic::{AssertUnwindSafe, catch_unwind}; + + let unwinder = Unwinder::install().unwrap(); + assert!(unwinder.verify_handler()); + + // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. + // SAFETY: we restore our handler below on all exit paths via the + // guard; nextest's process isolation limits blast radius even if + // restoration is skipped. + let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() }; + new_action.sa_sigaction = libc::SIG_IGN; + unsafe { libc::sigemptyset(&mut new_action.sa_mask) }; + let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, std::ptr::null_mut()) }; + assert_eq!(rc, 0, "failed to replace SIGSEGV handler"); + + struct RestoreGuard; + impl Drop for RestoreGuard { + fn drop(&mut self) { + // Best-effort restore of our handler. `Unwinder::install` + // is a no-op after the first successful call (idempotent + // flag), so use `sigaction` directly. Nextest's + // per-test process isolation means even if this fails, + // other tests are unaffected. + // SAFETY: process-global signal state; we rewrite + // SIGSEGV back to SIG_DFL so any subsequent fault during + // teardown terminates cleanly rather than being ignored. + let mut dfl: libc::sigaction = unsafe { std::mem::zeroed() }; + dfl.sa_sigaction = libc::SIG_DFL; + unsafe { libc::sigemptyset(&mut dfl.sa_mask) }; + let _ = unsafe { libc::sigaction(libc::SIGSEGV, &dfl, std::ptr::null_mut()) }; + } + } + let _guard = RestoreGuard; + + assert!( + !unwinder.verify_handler(), + "precondition: handler should appear replaced" + ); + + // capture() must panic via debug_assert before doing any stack + // walking. + let mut out = [0u64; 16]; + let result = catch_unwind(AssertUnwindSafe(|| { + // SAFETY: we intentionally violate the "handler installed" + // precondition to verify the debug_assert fires. The panic + // from debug_assert aborts before the frame walk runs. + let _ = unsafe { unwinder.capture(&mut out) }; + })); + assert!( + result.is_err(), + "capture should panic via debug_assert when handler is replaced" + ); } } From fde162f45adf2712f0f97a278000b65d105c1c4a Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 17:39:01 +0000 Subject: [PATCH 03/11] Move cfg gating to test module level and add symbolize integration test - unwinder.rs: Move per-test #[cfg(all(target_os, target_arch))] to a nested `mod linux` inside the test module - Add tests/symbolize_stack.rs: integration test that captures a stack trace via Unwinder and symbolizes it, verifying expected function names --- perf-self-profile/src/unwinder.rs | 368 ++++++++++----------- perf-self-profile/tests/symbolize_stack.rs | 43 +++ 2 files changed, 217 insertions(+), 194 deletions(-) create mode 100644 perf-self-profile/tests/symbolize_stack.rs diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 437d027a..bb0b1ec9 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -303,216 +303,196 @@ mod tests { target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64") ))] - #[test] - fn verify_handler_true_after_install() { - let u = Unwinder::install().unwrap(); - assert!(u.verify_handler(), "handler should be active after install"); - } + mod linux { + use super::*; - #[cfg(all( - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn capture_produces_frames() { - let unwinder = Unwinder::install().unwrap(); - #[inline(never)] - fn helper(u: &Unwinder) -> (CaptureResult, [u64; 64]) { - let mut out = [0u64; 64]; - // SAFETY: handler installed via Unwinder::install above; test thread - // is not inside a signal handler. - let result = unsafe { u.capture(&mut out) }; - std::hint::black_box(&out); - (result, out) - } - let (result, out) = helper(&unwinder); - assert!( - result.frames_written >= 2, - "expected at least 2 frames, got {}", - result.frames_written - ); - for (i, &addr) in out.iter().enumerate().take(result.frames_written) { - assert_ne!(addr, 0, "frame {i} must be non-zero"); + #[test] + fn verify_handler_true_after_install() { + let u = Unwinder::install().unwrap(); + assert!(u.verify_handler(), "handler should be active after install"); } - } - /// Tighter version of the frame-0 contract test: verify that frame 0 - /// lands inside `helper` (the caller of `capture`) rather than inside - /// `Unwinder::capture` itself. This catches the bug where the old - /// double-`#[inline(never)]` layering made frame 0 point at an - /// instruction inside `Unwinder::capture`'s body. - #[cfg(all( - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn frame_zero_points_into_caller_of_capture() { - let unwinder = Unwinder::install().unwrap(); - - #[inline(never)] - fn helper(u: &Unwinder) -> u64 { - let mut out = [0u64; 64]; - // SAFETY: same as capture_produces_frames. - let result = unsafe { u.capture(&mut out) }; - std::hint::black_box(&out); - assert!(result.frames_written >= 1); - out[0] + #[test] + fn capture_produces_frames() { + let unwinder = Unwinder::install().unwrap(); + #[inline(never)] + fn helper(u: &Unwinder) -> (CaptureResult, [u64; 64]) { + let mut out = [0u64; 64]; + // SAFETY: handler installed via Unwinder::install above; test thread + // is not inside a signal handler. + let result = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + (result, out) + } + let (result, out) = helper(&unwinder); + assert!( + result.frames_written >= 2, + "expected at least 2 frames, got {}", + result.frames_written + ); + for (i, &addr) in out.iter().enumerate().take(result.frames_written) { + assert_ne!(addr, 0, "frame {i} must be non-zero"); + } } - let frame0 = helper(&unwinder); - let helper_start = helper as *const () as u64; - let capture_fn = Unwinder::capture as *const () as u64; - - // Frame 0 must NOT be inside Unwinder::capture's body. Allow a - // generous 4 KiB window around its entry in case of debug bloat. - let capture_window = 4096u64; - assert!( - !(frame0 >= capture_fn && frame0 < capture_fn + capture_window), - "frame 0 {:#x} must not be inside Unwinder::capture [{:#x}..{:#x})", - frame0, - capture_fn, - capture_fn + capture_window, - ); + /// Tighter version of the frame-0 contract test: verify that frame 0 + /// lands inside `helper` (the caller of `capture`) rather than inside + /// `Unwinder::capture` itself. This catches the bug where the old + /// double-`#[inline(never)]` layering made frame 0 point at an + /// instruction inside `Unwinder::capture`'s body. + #[test] + fn frame_zero_points_into_caller_of_capture() { + let unwinder = Unwinder::install().unwrap(); + + #[inline(never)] + fn helper(u: &Unwinder) -> u64 { + let mut out = [0u64; 64]; + // SAFETY: same as capture_produces_frames. + let result = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + assert!(result.frames_written >= 1); + out[0] + } - // Stronger: frame 0 should land inside `helper`'s body. Debug - // builds can be large, so use a 64 KiB window as an upper bound. - let helper_window = 64 * 1024u64; - assert!( - frame0 >= helper_start && frame0 < helper_start + helper_window, - "frame 0 {:#x} should be inside helper [{:#x}..{:#x})", - frame0, - helper_start, - helper_start + helper_window, - ); - } + let frame0 = helper(&unwinder); + let helper_start = helper as *const () as u64; + let capture_fn = Unwinder::capture as *const () as u64; + + // Frame 0 must NOT be inside Unwinder::capture's body. Allow a + // generous 4 KiB window around its entry in case of debug bloat. + let capture_window = 4096u64; + assert!( + !(frame0 >= capture_fn && frame0 < capture_fn + capture_window), + "frame 0 {:#x} must not be inside Unwinder::capture [{:#x}..{:#x})", + frame0, + capture_fn, + capture_fn + capture_window, + ); - #[cfg(all( - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn capture_respects_output_buffer_limit() { - let unwinder = Unwinder::install().unwrap(); - let mut out = [0u64; 1]; - // SAFETY: handler installed; test context is not a signal handler. - let result = unsafe { unwinder.capture(&mut out) }; - assert!( - result.frames_written <= 1, - "expected at most 1 frame, got {}", - result.frames_written - ); - if result.frames_written == 1 { - assert_ne!(out[0], 0, "frame 0 must be non-zero when written"); + // Stronger: frame 0 should land inside `helper`'s body. Debug + // builds can be large, so use a 64 KiB window as an upper bound. + let helper_window = 64 * 1024u64; + assert!( + frame0 >= helper_start && frame0 < helper_start + helper_window, + "frame 0 {:#x} should be inside helper [{:#x}..{:#x})", + frame0, + helper_start, + helper_start + helper_window, + ); } - } - #[cfg(all( - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn capture_reports_truncation_with_tiny_buffer() { - let unwinder = Unwinder::install().unwrap(); - // Build a small but real call chain so a 1-slot buffer is bound - // to truncate. - #[inline(never)] - fn depth_2(u: &Unwinder) -> CaptureResult { + #[test] + fn capture_respects_output_buffer_limit() { + let unwinder = Unwinder::install().unwrap(); let mut out = [0u64; 1]; - // SAFETY: handler installed above. - let r = unsafe { u.capture(&mut out) }; - std::hint::black_box(&out); - r + // SAFETY: handler installed; test context is not a signal handler. + let result = unsafe { unwinder.capture(&mut out) }; + assert!( + result.frames_written <= 1, + "expected at most 1 frame, got {}", + result.frames_written + ); + if result.frames_written == 1 { + assert_ne!(out[0], 0, "frame 0 must be non-zero when written"); + } } - #[inline(never)] - fn depth_1(u: &Unwinder) -> CaptureResult { - std::hint::black_box(depth_2(u)) + + #[test] + fn capture_reports_truncation_with_tiny_buffer() { + let unwinder = Unwinder::install().unwrap(); + // Build a small but real call chain so a 1-slot buffer is bound + // to truncate. + #[inline(never)] + fn depth_2(u: &Unwinder) -> CaptureResult { + let mut out = [0u64; 1]; + // SAFETY: handler installed above. + let r = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + r + } + #[inline(never)] + fn depth_1(u: &Unwinder) -> CaptureResult { + std::hint::black_box(depth_2(u)) + } + let result = depth_1(&unwinder); + assert_eq!(result.frames_written, 1); + assert!( + result.truncated, + "a 1-slot buffer with a multi-frame stack must report truncated" + ); } - let result = depth_1(&unwinder); - assert_eq!(result.frames_written, 1); - assert!( - result.truncated, - "a 1-slot buffer with a multi-frame stack must report truncated" - ); - } - #[cfg(all( - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn capture_with_empty_buffer_reports_truncation() { - let unwinder = Unwinder::install().unwrap(); - // SAFETY: handler installed above. - let result = unsafe { unwinder.capture(&mut []) }; - assert_eq!(result.frames_written, 0); - assert!(result.truncated); - } + #[test] + fn capture_with_empty_buffer_reports_truncation() { + let unwinder = Unwinder::install().unwrap(); + // SAFETY: handler installed above. + let result = unsafe { unwinder.capture(&mut []) }; + assert_eq!(result.frames_written, 0); + assert!(result.truncated); + } - /// In debug builds, `capture` panics via `debug_assert!` when its SIGSEGV - /// handler has been replaced out from under it. This protects against - /// third-party signal handlers silently breaking stack-walk fault - /// recovery. - /// - /// Nextest runs each test in its own process, so replacing the SIGSEGV - /// handler here cannot affect sibling tests. - #[cfg(all( - debug_assertions, - target_os = "linux", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - #[test] - fn capture_debug_asserts_when_handler_replaced() { - use std::panic::{AssertUnwindSafe, catch_unwind}; - - let unwinder = Unwinder::install().unwrap(); - assert!(unwinder.verify_handler()); - - // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. - // SAFETY: we restore our handler below on all exit paths via the - // guard; nextest's process isolation limits blast radius even if - // restoration is skipped. - let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() }; - new_action.sa_sigaction = libc::SIG_IGN; - unsafe { libc::sigemptyset(&mut new_action.sa_mask) }; - let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, std::ptr::null_mut()) }; - assert_eq!(rc, 0, "failed to replace SIGSEGV handler"); - - struct RestoreGuard; - impl Drop for RestoreGuard { - fn drop(&mut self) { - // Best-effort restore of our handler. `Unwinder::install` - // is a no-op after the first successful call (idempotent - // flag), so use `sigaction` directly. Nextest's - // per-test process isolation means even if this fails, - // other tests are unaffected. - // SAFETY: process-global signal state; we rewrite - // SIGSEGV back to SIG_DFL so any subsequent fault during - // teardown terminates cleanly rather than being ignored. - let mut dfl: libc::sigaction = unsafe { std::mem::zeroed() }; - dfl.sa_sigaction = libc::SIG_DFL; - unsafe { libc::sigemptyset(&mut dfl.sa_mask) }; - let _ = unsafe { libc::sigaction(libc::SIGSEGV, &dfl, std::ptr::null_mut()) }; + /// In debug builds, `capture` panics via `debug_assert!` when its SIGSEGV + /// handler has been replaced out from under it. This protects against + /// third-party signal handlers silently breaking stack-walk fault + /// recovery. + /// + /// Nextest runs each test in its own process, so replacing the SIGSEGV + /// handler here cannot affect sibling tests. + #[cfg(debug_assertions)] + #[test] + fn capture_debug_asserts_when_handler_replaced() { + use std::panic::{AssertUnwindSafe, catch_unwind}; + + let unwinder = Unwinder::install().unwrap(); + assert!(unwinder.verify_handler()); + + // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. + // SAFETY: we restore our handler below on all exit paths via the + // guard; nextest's process isolation limits blast radius even if + // restoration is skipped. + let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() }; + new_action.sa_sigaction = libc::SIG_IGN; + unsafe { libc::sigemptyset(&mut new_action.sa_mask) }; + let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, std::ptr::null_mut()) }; + assert_eq!(rc, 0, "failed to replace SIGSEGV handler"); + + struct RestoreGuard; + impl Drop for RestoreGuard { + fn drop(&mut self) { + // Best-effort restore of our handler. `Unwinder::install` + // is a no-op after the first successful call (idempotent + // flag), so use `sigaction` directly. Nextest's + // per-test process isolation means even if this fails, + // other tests are unaffected. + // SAFETY: process-global signal state; we rewrite + // SIGSEGV back to SIG_DFL so any subsequent fault during + // teardown terminates cleanly rather than being ignored. + let mut dfl: libc::sigaction = unsafe { std::mem::zeroed() }; + dfl.sa_sigaction = libc::SIG_DFL; + unsafe { libc::sigemptyset(&mut dfl.sa_mask) }; + let _ = unsafe { libc::sigaction(libc::SIGSEGV, &dfl, std::ptr::null_mut()) }; + } } - } - let _guard = RestoreGuard; + let _guard = RestoreGuard; - assert!( - !unwinder.verify_handler(), - "precondition: handler should appear replaced" - ); + assert!( + !unwinder.verify_handler(), + "precondition: handler should appear replaced" + ); - // capture() must panic via debug_assert before doing any stack - // walking. - let mut out = [0u64; 16]; - let result = catch_unwind(AssertUnwindSafe(|| { - // SAFETY: we intentionally violate the "handler installed" - // precondition to verify the debug_assert fires. The panic - // from debug_assert aborts before the frame walk runs. - let _ = unsafe { unwinder.capture(&mut out) }; - })); - assert!( - result.is_err(), - "capture should panic via debug_assert when handler is replaced" - ); + // capture() must panic via debug_assert before doing any stack + // walking. + let mut out = [0u64; 16]; + let result = catch_unwind(AssertUnwindSafe(|| { + // SAFETY: we intentionally violate the "handler installed" + // precondition to verify the debug_assert fires. The panic + // from debug_assert aborts before the frame walk runs. + let _ = unsafe { unwinder.capture(&mut out) }; + })); + assert!( + result.is_err(), + "capture should panic via debug_assert when handler is replaced" + ); + } } } diff --git a/perf-self-profile/tests/symbolize_stack.rs b/perf-self-profile/tests/symbolize_stack.rs new file mode 100644 index 00000000..eb4b0789 --- /dev/null +++ b/perf-self-profile/tests/symbolize_stack.rs @@ -0,0 +1,43 @@ +#![cfg(target_os = "linux")] + +use dial9_perf_self_profile::unwinder::Unwinder; +use dial9_perf_self_profile::{read_proc_maps, resolve_symbol_with_maps}; + +use blazesym::symbolize::Symbolizer; + +#[inline(never)] +fn outer() -> Vec { + std::hint::black_box(inner()) +} + +#[inline(never)] +fn inner() -> Vec { + let unwinder = Unwinder::install().unwrap(); + let mut buf = [0u64; 64]; + // SAFETY: handler installed above; not inside a signal handler. + let result = unsafe { unwinder.capture(&mut buf) }; + assert!(result.frames_written >= 3, "expected at least 3 frames"); + buf[..result.frames_written].to_vec() +} + +#[test] +fn capture_and_symbolize() { + let stack = outer(); + let maps = read_proc_maps(); + let symbolizer = Symbolizer::new(); + + let names: Vec = stack + .iter() + .filter_map(|&addr| resolve_symbol_with_maps(addr, &symbolizer, &maps).name) + .collect(); + + let joined = names.join("\n"); + assert!( + names.iter().any(|n| n.contains("inner")), + "expected `inner` in stack:\n{joined}" + ); + assert!( + names.iter().any(|n| n.contains("outer")), + "expected `outer` in stack:\n{joined}" + ); +} From e82a886b1559b2da00a825ce6b057c4e79cc10a9 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 17:45:28 +0000 Subject: [PATCH 04/11] Fix CI: skip unwinder tests when SIGSEGV handler is replaced - Move install_is_idempotent tests into cfg-gated linux module (fixes macOS) - Add install_or_skip() helper that skips capture tests when the handler is replaced by sanitizers or seccomp (fixes ASAN + ECS simulation) - Guard symbolize_stack integration test with verify_handler check --- perf-self-profile/src/unwinder.rs | 83 ++++++++++++++-------- perf-self-profile/tests/symbolize_stack.rs | 17 +++-- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index bb0b1ec9..6f11253a 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -277,44 +277,57 @@ mod platform { #[cfg(test)] mod tests { - use super::*; - - #[test] - fn install_is_idempotent() { - let r1 = Unwinder::install(); - let r2 = Unwinder::install(); - let r3 = Unwinder::install(); - assert!(r1.is_ok()); - assert!(r2.is_ok()); - assert!(r3.is_ok()); - } - - #[test] - fn install_is_idempotent_across_threads() { - let handles: Vec<_> = (0..8) - .map(|_| std::thread::spawn(Unwinder::install)) - .collect(); - for h in handles { - assert!(h.join().unwrap().is_ok()); - } - } - #[cfg(all( target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64") ))] mod linux { - use super::*; + use super::super::*; + + /// Skip the test if something (e.g. ASAN) replaced our SIGSEGV handler + /// after install. Returns the Unwinder on success. + fn install_or_skip() -> Option { + let u = Unwinder::install().unwrap(); + if !u.verify_handler() { + eprintln!("skipping: SIGSEGV handler was replaced (sanitizer?)"); + return None; + } + Some(u) + } + + #[test] + fn install_is_idempotent() { + let r1 = Unwinder::install(); + let r2 = Unwinder::install(); + let r3 = Unwinder::install(); + assert!(r1.is_ok()); + assert!(r2.is_ok()); + assert!(r3.is_ok()); + } + + #[test] + fn install_is_idempotent_across_threads() { + let handles: Vec<_> = (0..8) + .map(|_| std::thread::spawn(Unwinder::install)) + .collect(); + for h in handles { + assert!(h.join().unwrap().is_ok()); + } + } #[test] fn verify_handler_true_after_install() { - let u = Unwinder::install().unwrap(); + let Some(u) = install_or_skip() else { + return; + }; assert!(u.verify_handler(), "handler should be active after install"); } #[test] fn capture_produces_frames() { - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; #[inline(never)] fn helper(u: &Unwinder) -> (CaptureResult, [u64; 64]) { let mut out = [0u64; 64]; @@ -342,7 +355,9 @@ mod tests { /// instruction inside `Unwinder::capture`'s body. #[test] fn frame_zero_points_into_caller_of_capture() { - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; #[inline(never)] fn helper(u: &Unwinder) -> u64 { @@ -383,7 +398,9 @@ mod tests { #[test] fn capture_respects_output_buffer_limit() { - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; let mut out = [0u64; 1]; // SAFETY: handler installed; test context is not a signal handler. let result = unsafe { unwinder.capture(&mut out) }; @@ -399,7 +416,9 @@ mod tests { #[test] fn capture_reports_truncation_with_tiny_buffer() { - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; // Build a small but real call chain so a 1-slot buffer is bound // to truncate. #[inline(never)] @@ -424,7 +443,9 @@ mod tests { #[test] fn capture_with_empty_buffer_reports_truncation() { - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; // SAFETY: handler installed above. let result = unsafe { unwinder.capture(&mut []) }; assert_eq!(result.frames_written, 0); @@ -443,7 +464,9 @@ mod tests { fn capture_debug_asserts_when_handler_replaced() { use std::panic::{AssertUnwindSafe, catch_unwind}; - let unwinder = Unwinder::install().unwrap(); + let Some(unwinder) = install_or_skip() else { + return; + }; assert!(unwinder.verify_handler()); // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. diff --git a/perf-self-profile/tests/symbolize_stack.rs b/perf-self-profile/tests/symbolize_stack.rs index eb4b0789..e9172982 100644 --- a/perf-self-profile/tests/symbolize_stack.rs +++ b/perf-self-profile/tests/symbolize_stack.rs @@ -6,15 +6,14 @@ use dial9_perf_self_profile::{read_proc_maps, resolve_symbol_with_maps}; use blazesym::symbolize::Symbolizer; #[inline(never)] -fn outer() -> Vec { - std::hint::black_box(inner()) +fn outer(unwinder: &Unwinder) -> Vec { + std::hint::black_box(inner(unwinder)) } #[inline(never)] -fn inner() -> Vec { - let unwinder = Unwinder::install().unwrap(); +fn inner(unwinder: &Unwinder) -> Vec { let mut buf = [0u64; 64]; - // SAFETY: handler installed above; not inside a signal handler. + // SAFETY: handler installed by caller; not inside a signal handler. let result = unsafe { unwinder.capture(&mut buf) }; assert!(result.frames_written >= 3, "expected at least 3 frames"); buf[..result.frames_written].to_vec() @@ -22,7 +21,13 @@ fn inner() -> Vec { #[test] fn capture_and_symbolize() { - let stack = outer(); + let unwinder = Unwinder::install().unwrap(); + if !unwinder.verify_handler() { + eprintln!("skipping: SIGSEGV handler was replaced (sanitizer?)"); + return; + } + + let stack = outer(&unwinder); let maps = read_proc_maps(); let symbolizer = Symbolizer::new(); From e352175fd670f9b6121c74e189c178d4fd921825 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 23:58:17 +0000 Subject: [PATCH 05/11] fix: aarch64 pointer auth --- perf-self-profile/src/unwinder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 6f11253a..bfc93a69 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -244,6 +244,10 @@ mod platform { // [saved_fp (x29) : u64, saved_lr : u64, ...] // so `*fp` and `*(fp + 8)` are valid reads of live stack memory. let ret_addr = unsafe { *(fp as *const usize).add(1) }; + // Strip pointer authentication bits (ARMv8.3-A PAC). The saved LR may + // be signed when compiled with `-Z branch-protection=pac-ret`. + // See: https://www.kernel.org/doc/html/latest/arch/arm64/pointer-authentication.html + let ret_addr = ret_addr & 0x0000_FFFF_FFFF_FFFF; let caller_fp = unsafe { *(fp as *const usize) }; (ret_addr, caller_fp, sp) } From 250735d96d13b080025522f9b5a8b8ae2e237462 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 23:58:38 +0000 Subject: [PATCH 06/11] strengthen test assertion --- perf-self-profile/tests/symbolize_stack.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/perf-self-profile/tests/symbolize_stack.rs b/perf-self-profile/tests/symbolize_stack.rs index e9172982..584b1874 100644 --- a/perf-self-profile/tests/symbolize_stack.rs +++ b/perf-self-profile/tests/symbolize_stack.rs @@ -35,14 +35,12 @@ fn capture_and_symbolize() { .iter() .filter_map(|&addr| resolve_symbol_with_maps(addr, &symbolizer, &maps).name) .collect(); - - let joined = names.join("\n"); - assert!( - names.iter().any(|n| n.contains("inner")), - "expected `inner` in stack:\n{joined}" - ); - assert!( - names.iter().any(|n| n.contains("outer")), - "expected `outer` in stack:\n{joined}" + assert_eq!( + names[0..3], + vec![ + "symbolize_stack::inner", + "symbolize_stack::outer", + "symbolize_stack::capture_and_symbolize" + ] ); } From 945f1b98724361c27dd1b01034ebbba5605f74c7 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Tue, 12 May 2026 23:59:39 +0000 Subject: [PATCH 07/11] add test for non-linux path --- .../src/sys/linux/fp_profiler/unwind.rs | 4 ++-- perf-self-profile/src/unwinder.rs | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs index ce9d9978..3f74899a 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs @@ -36,13 +36,13 @@ const MAX_FRAME_SIZE: usize = 0x40000; /// Safe to apply unconditionally, on non-PAC systems the upper bits are zero. #[cfg(target_arch = "aarch64")] #[inline(always)] -fn strip_pac(addr: usize) -> usize { +pub(crate) fn strip_pac(addr: usize) -> usize { addr & 0x0000_FFFF_FFFF_FFFF } #[cfg(target_arch = "x86_64")] #[inline(always)] -fn strip_pac(addr: usize) -> usize { +pub(crate) fn strip_pac(addr: usize) -> usize { addr } diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index bfc93a69..77160dc9 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -1,6 +1,6 @@ //! Framepointer-based stack unwinding //! -//! This implementation uses a SIGSEV fault handler to allow safe walking of stacks. Without +//! This implementation uses a SIGSEGV fault handler to allow safe walking of stacks. Without //! this, there is no way to perform framepointer unwinding without risking segfaults when walking //! stacks where framepointers are not enabled. //! @@ -246,8 +246,7 @@ mod platform { let ret_addr = unsafe { *(fp as *const usize).add(1) }; // Strip pointer authentication bits (ARMv8.3-A PAC). The saved LR may // be signed when compiled with `-Z branch-protection=pac-ret`. - // See: https://www.kernel.org/doc/html/latest/arch/arm64/pointer-authentication.html - let ret_addr = ret_addr & 0x0000_FFFF_FFFF_FFFF; + let ret_addr = crate::sys::fp_profiler::unwind::strip_pac(ret_addr); let caller_fp = unsafe { *(fp as *const usize) }; (ret_addr, caller_fp, sp) } @@ -522,4 +521,18 @@ mod tests { ); } } + + #[cfg(not(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + )))] + mod unsupported { + use super::super::*; + + #[test] + fn install_returns_unsupported() { + let err = Unwinder::install().unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::Unsupported); + } + } } From 11be73308ccbb2564b8ec0243bbb2d4bd546512d Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Wed, 13 May 2026 00:12:49 +0000 Subject: [PATCH 08/11] fix: RestoreGuard saves/restores old sigaction instead of SIG_DFL The test's RestoreGuard previously reset SIGSEGV to SIG_DFL on drop. Under `cargo test`'s threaded harness (no process isolation), this could leave concurrent tests without the dial9 fault handler. Now we save the old sigaction when replacing and restore it on drop, making the test safe regardless of test runner. --- perf-self-profile/src/unwinder.rs | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 77160dc9..1441d70d 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -474,32 +474,24 @@ mod tests { // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. // SAFETY: we restore our handler below on all exit paths via the - // guard; nextest's process isolation limits blast radius even if - // restoration is skipped. + // guard, making this safe even under `cargo test`'s threaded harness. let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() }; new_action.sa_sigaction = libc::SIG_IGN; unsafe { libc::sigemptyset(&mut new_action.sa_mask) }; - let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, std::ptr::null_mut()) }; + let mut old_action: libc::sigaction = unsafe { std::mem::zeroed() }; + let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, &mut old_action) }; assert_eq!(rc, 0, "failed to replace SIGSEGV handler"); - struct RestoreGuard; + struct RestoreGuard(libc::sigaction); impl Drop for RestoreGuard { fn drop(&mut self) { - // Best-effort restore of our handler. `Unwinder::install` - // is a no-op after the first successful call (idempotent - // flag), so use `sigaction` directly. Nextest's - // per-test process isolation means even if this fails, - // other tests are unaffected. - // SAFETY: process-global signal state; we rewrite - // SIGSEGV back to SIG_DFL so any subsequent fault during - // teardown terminates cleanly rather than being ignored. - let mut dfl: libc::sigaction = unsafe { std::mem::zeroed() }; - dfl.sa_sigaction = libc::SIG_DFL; - unsafe { libc::sigemptyset(&mut dfl.sa_mask) }; - let _ = unsafe { libc::sigaction(libc::SIGSEGV, &dfl, std::ptr::null_mut()) }; + // Restore the original handler so concurrent tests under + // `cargo test` (threaded harness) still have fault recovery. + let _ = + unsafe { libc::sigaction(libc::SIGSEGV, &self.0, std::ptr::null_mut()) }; } } - let _guard = RestoreGuard; + let _guard = RestoreGuard(old_action); assert!( !unwinder.verify_handler(), From 75d82d69fcb417d29b68f8ad746b01d569e92b41 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Wed, 13 May 2026 00:19:00 +0000 Subject: [PATCH 09/11] fix: widen frame_zero test window to 512 KiB for debug builds In unoptimized debug builds (as used by the ECS simulation CI target), nested #[inline(never)] functions can be laid out 100+ KiB from their symbol address. The previous 64 KiB window was too small. --- perf-self-profile/src/unwinder.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 1441d70d..362d50b5 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -388,8 +388,9 @@ mod tests { ); // Stronger: frame 0 should land inside `helper`'s body. Debug - // builds can be large, so use a 64 KiB window as an upper bound. - let helper_window = 64 * 1024u64; + // builds can be very large (100+ KiB per function with full + // debuginfo and no optimization), so use a generous window. + let helper_window = 512 * 1024u64; assert!( frame0 >= helper_start && frame0 < helper_start + helper_window, "frame 0 {:#x} should be inside helper [{:#x}..{:#x})", From 592656c00d8908cfefd04500df82c1c7d59a1569 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Thu, 14 May 2026 18:14:14 +0000 Subject: [PATCH 10/11] fix: return CaptureResult from core unwind function --- perf-self-profile/benches/unwind.rs | 4 +- .../src/sys/linux/ctimer_sampler.rs | 4 +- .../src/sys/linux/fp_profiler/unwind.rs | 114 +++++++++++++----- perf-self-profile/src/unwinder.rs | 6 +- 4 files changed, 92 insertions(+), 36 deletions(-) diff --git a/perf-self-profile/benches/unwind.rs b/perf-self-profile/benches/unwind.rs index 1affc9eb..d30e1c79 100644 --- a/perf-self-profile/benches/unwind.rs +++ b/perf-self-profile/benches/unwind.rs @@ -93,8 +93,8 @@ fn read_registers() -> (usize, usize, usize) { fn do_unwind() -> usize { let (pc, fp, sp) = read_registers(); let mut out = [0u64; 128]; - let (n, _truncated) = unsafe { unwind(pc, fp, sp, &mut out) }; - n + let result = unsafe { unwind(pc, fp, sp, &mut out) }; + result.frames_written } /// Build a chain of exactly N inline(never) frames via recursion. diff --git a/perf-self-profile/src/sys/linux/ctimer_sampler.rs b/perf-self-profile/src/sys/linux/ctimer_sampler.rs index 9a95daa9..23a48c44 100644 --- a/perf-self-profile/src/sys/linux/ctimer_sampler.rs +++ b/perf-self-profile/src/sys/linux/ctimer_sampler.rs @@ -194,8 +194,8 @@ extern "C" fn sigprof_handler( slot.write(pid, tid, time, cpu, period); // Unwind into the slot's frame buffer - let (count, _truncated) = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); - slot.set_num_frames(count as u32); + let result = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); + slot.set_num_frames(result.frames_written as u32); slot.commit(); } diff --git a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs index 3f74899a..6ba2a164 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs @@ -19,6 +19,7 @@ //! aborts the walk instead of crashing. use super::{SAFE_LOAD_FAULT, load}; +use crate::unwinder::CaptureResult; // Cap on frames per sample; prevents runaway walks on corrupted FP chains. pub const MAX_FRAMES: usize = 128; @@ -49,21 +50,24 @@ pub(crate) fn strip_pac(addr: usize) -> usize { /// Walk the frame-pointer chain starting from the given (pc, fp, sp) triple, /// usually obtained from a signal handler's ucontext. /// -/// Returns `(frames_written, truncated)`. `truncated` is `true` if the walk -/// stopped because the output buffer (or [`MAX_FRAMES`]) was full *and* at -/// least one additional frame would have been valid. A natural stop (end of -/// chain, faulty load, implausible pointer) produces `truncated = false`. +/// `truncated` is `true` if the walk stopped because the output buffer (or +/// [`MAX_FRAMES`]) was full *and* at least one additional frame would have been +/// valid. A natural stop (end of chain, faulty load, implausible pointer) +/// produces `truncated = false`. /// /// # Safety /// - `install_handler` must have been called. /// - Should generally be called from a signal handler where the target thread /// is stopped; walking a running thread's stack races with mutations. -pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> (usize, bool) { +pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> CaptureResult { let limit = out.len().min(MAX_FRAMES); if limit == 0 { // No room even for the interrupted PC. The walk would have produced // at least one frame, so this is truncation. - return (0, true); + return CaptureResult { + frames_written: 0, + truncated: true, + }; } out[0] = pc as u64; @@ -75,36 +79,57 @@ pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> (u loop { // Validate current fp before reading from it. if fp < stack_lo || fp >= stack_hi { - return (n, false); + return CaptureResult { + frames_written: n, + truncated: false, + }; } if fp & (core::mem::size_of::() - 1) != 0 { - return (n, false); // misaligned + return CaptureResult { + frames_written: n, + truncated: false, + }; // misaligned } let saved_fp = unsafe { load(fp as *const usize) }; if saved_fp == SAFE_LOAD_FAULT { - return (n, false); + return CaptureResult { + frames_written: n, + truncated: false, + }; } let ret_addr_slot = (fp + core::mem::size_of::()) as *const usize; let ret_addr = strip_pac(unsafe { load(ret_addr_slot) }); if ret_addr == SAFE_LOAD_FAULT { - return (n, false); + return CaptureResult { + frames_written: n, + truncated: false, + }; } if !(DEAD_ZONE..=usize::MAX - DEAD_ZONE).contains(&ret_addr) { - return (n, false); + return CaptureResult { + frames_written: n, + truncated: false, + }; } // Frame pointer must advance (stacks grow down -> saved_fp > fp) // but not by more than MAX_FRAME_SIZE. if saved_fp <= fp || saved_fp - fp > MAX_FRAME_SIZE { - return (n, false); + return CaptureResult { + frames_written: n, + truncated: false, + }; } // We have a valid next frame. If we ran out of room to record it, // the walk is truncated. if n >= limit { - return (n, true); + return CaptureResult { + frames_written: n, + truncated: true, + }; } out[n] = ret_addr as u64; @@ -115,11 +140,11 @@ pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> (u /// Unwind from inside a signal handler given the raw ucontext. /// -/// Returns `(frames_written, truncated)`; see [`unwind`] for details. +/// See [`unwind`] for details on the return value. /// /// # Safety /// `ucontext` must be the pointer the kernel passed to a SA_SIGINFO handler. -pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> (usize, bool) { +pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> CaptureResult { let (pc, fp, sp) = unsafe { read_pc_fp_sp(ucontext) }; unsafe { unwind(pc, fp, sp, out) } } @@ -175,7 +200,10 @@ mod tests { stack[4] = base + 4 * sz; let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 3); assert!(!truncated); @@ -188,7 +216,10 @@ mod tests { fn frame_zero_is_always_the_interrupted_pc() { install(); let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; assert_eq!(n, 1); assert!(!truncated); assert_eq!(out[0], 0xDEAD); @@ -199,7 +230,10 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let (n, truncated) = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; assert_eq!(n, 1); assert!(!truncated); } @@ -209,7 +243,10 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let (n, truncated) = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; assert_eq!(n, 1); assert!(!truncated); } @@ -225,7 +262,10 @@ mod tests { stack[1] = 0x100; // return addr in dead zone (< 0x1000) let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); assert!(!truncated); } @@ -239,7 +279,10 @@ mod tests { stack[0] = base + MAX_FRAME_SIZE + 8; // jump exceeds MAX_FRAME_SIZE let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); assert!(!truncated); } @@ -253,7 +296,10 @@ mod tests { stack[0] = base; // saved_fp == fp, doesn't advance let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); assert!(!truncated); } @@ -262,7 +308,9 @@ mod tests { fn respects_output_buffer_limit() { install(); let mut out = [0u64; 1]; - let (n, _truncated) = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + let CaptureResult { + frames_written: n, .. + } = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; assert_eq!(n, 1); assert_eq!(out[0], 0x40_0000); } @@ -283,7 +331,10 @@ mod tests { // Buffer fits only pc + 1 frame, so the 3rd frame is dropped. let mut out = [0u64; 2]; - let (n, truncated) = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 2); assert!( truncated, @@ -297,7 +348,10 @@ mod tests { fn empty_buffer_reports_truncation() { install(); let mut out: [u64; 0] = []; - let (n, truncated) = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; assert_eq!(n, 0); assert!(truncated, "empty buffer can always hold more"); } @@ -328,7 +382,10 @@ mod tests { // sp = fp keeps fp in range, page-aligned. let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(guard, ps) }, 0); @@ -367,7 +424,10 @@ mod tests { unsafe { (fp as *mut usize).write(fp + 16) }; let mut out = [0u64; MAX_FRAMES]; - let (n, truncated) = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(region, 2 * ps) }, 0); diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 362d50b5..4dd2bcfc 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -154,11 +154,7 @@ mod platform { // SAFETY: handler is installed (caller holds Unwinder), and we are // not inside a SIGSEGV handler (see Unwinder::capture safety // contract). - let (frames_written, truncated) = unsafe { unwind(pc, fp, sp, out) }; - CaptureResult { - frames_written, - truncated, - } + unsafe { unwind(pc, fp, sp, out) } } /// Read `(pc, fp, sp)` such that `pc` is the PC to use for frame 0 and From 353e5ed99af77ba2cdcd1eca83dce58300cd82cf Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Thu, 14 May 2026 18:42:35 +0000 Subject: [PATCH 11/11] fix: comments, visibility --- perf-self-profile/src/sys/linux/fp_profiler/unwind.rs | 5 ++++- perf-self-profile/src/unwinder.rs | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs index 6ba2a164..3de2df5b 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs @@ -144,7 +144,10 @@ pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> Ca /// /// # Safety /// `ucontext` must be the pointer the kernel passed to a SA_SIGINFO handler. -pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> CaptureResult { +pub(crate) unsafe fn unwind_from_ucontext( + ucontext: *mut libc::c_void, + out: &mut [u64], +) -> CaptureResult { let (pc, fp, sp) = unsafe { read_pc_fp_sp(ucontext) }; unsafe { unwind(pc, fp, sp, out) } } diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs index 4dd2bcfc..30e95b0e 100644 --- a/perf-self-profile/src/unwinder.rs +++ b/perf-self-profile/src/unwinder.rs @@ -7,7 +7,7 @@ //! Because of this, the unwinder must be "[`install`ed](Unwinder::install)" before //! you can use it to [`capture`](Unwinder::capture) a stack. //! -//! The unwinded stacks are only addresses. You must use a symbolizer separately to +//! The unwound stacks are only addresses. You must use a symbolizer separately to //! convert the addresses into function names. /// Result of a [`Unwinder::capture`] call. @@ -101,6 +101,8 @@ impl Unwinder { /// (binary compiled with `-C force-frame-pointers=yes`, no code /// currently executing in a prologue/epilogue window where `rbp` /// does not point at a saved-fp slot). + // Takes `&self` to prove that `Unwinder::install()` succeeded, even though + // no instance data is accessed internally. #[inline(never)] pub unsafe fn capture(&self, out: &mut [u64]) -> CaptureResult { // Debug-only check that our SIGSEGV handler is still the active