diff --git a/perf-self-profile/benches/unwind.rs b/perf-self-profile/benches/unwind.rs index cbf45a26..d30e1c79 100644 --- a/perf-self-profile/benches/unwind.rs +++ b/perf-self-profile/benches/unwind.rs @@ -93,7 +93,8 @@ fn read_registers() -> (usize, usize, usize) { fn do_unwind() -> usize { let (pc, fp, sp) = read_registers(); let mut out = [0u64; 128]; - unsafe { unwind(pc, fp, sp, &mut out) } + let result = unsafe { unwind(pc, fp, sp, &mut out) }; + result.frames_written } /// Build a chain of exactly N inline(never) frames via recursion. diff --git a/perf-self-profile/src/lib.rs b/perf-self-profile/src/lib.rs index f7c58293..912483af 100644 --- a/perf-self-profile/src/lib.rs +++ b/perf-self-profile/src/lib.rs @@ -42,6 +42,7 @@ mod sampler; mod symbolize; mod sys; pub mod tracepoint; +pub mod unwinder; pub use offline_symbolize::SymbolTableEntry; pub use sampler::{EventSource, Sample, SamplerConfig, SamplingMode}; diff --git a/perf-self-profile/src/sys/linux/ctimer_sampler.rs b/perf-self-profile/src/sys/linux/ctimer_sampler.rs index 10deba62..23a48c44 100644 --- a/perf-self-profile/src/sys/linux/ctimer_sampler.rs +++ b/perf-self-profile/src/sys/linux/ctimer_sampler.rs @@ -194,8 +194,8 @@ extern "C" fn sigprof_handler( slot.write(pid, tid, time, cpu, period); // Unwind into the slot's frame buffer - let count = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); - slot.set_num_frames(count as u32); + let result = unwind::unwind_from_ucontext(ucontext, slot.frames_mut()); + slot.set_num_frames(result.frames_written as u32); slot.commit(); } diff --git a/perf-self-profile/src/sys/linux/fp_profiler/mod.rs b/perf-self-profile/src/sys/linux/fp_profiler/mod.rs index 14d0337f..bc886605 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/mod.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/mod.rs @@ -102,6 +102,35 @@ mod supported { Ok(()) } + /// Check whether our SIGSEGV handler is still the active handler for + /// SIGSEGV. Returns `true` if the currently-installed handler matches + /// the one we registered in [`install_handler`]. + /// + /// Some other code may install its own SIGSEGV handler after ours, + /// either chaining to us or not. This function lets callers detect + /// that case so they can reinstall or skip capture. + /// + /// Performs one `sigaction` syscall; not suitable for hot paths. + pub fn handler_is_installed() -> bool { + // If we never installed, we cannot be installed. + if !HANDLER_INSTALLED.load(Ordering::SeqCst) { + return false; + } + + // Query current SIGSEGV handler without modifying it. + let mut current: libc::sigaction = unsafe { std::mem::zeroed() }; + // SAFETY: passing a null `act` pointer is valid per POSIX and only + // retrieves the current action. + let rc = unsafe { libc::sigaction(libc::SIGSEGV, ptr::null(), &mut current) }; + if rc != 0 { + return false; + } + + // Compare sa_sigaction function pointer against our handler. + let expected = sigsegv_handler as *const () as usize; + current.sa_sigaction == expected + } + /// SIGSEGV handler for `safe_load`: if the faulting PC is within the /// `safe_load_start..safe_load_end` instruction range, it skips the faulting /// load, and resumes execution. @@ -192,4 +221,4 @@ mod supported { } } -pub use supported::{install_handler, load}; +pub use supported::{handler_is_installed, install_handler, load}; diff --git a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs index c455445e..3de2df5b 100644 --- a/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs +++ b/perf-self-profile/src/sys/linux/fp_profiler/unwind.rs @@ -19,6 +19,7 @@ //! aborts the walk instead of crashing. use super::{SAFE_LOAD_FAULT, load}; +use crate::unwinder::CaptureResult; // Cap on frames per sample; prevents runaway walks on corrupted FP chains. pub const MAX_FRAMES: usize = 128; @@ -36,27 +37,37 @@ const MAX_FRAME_SIZE: usize = 0x40000; /// Safe to apply unconditionally, on non-PAC systems the upper bits are zero. #[cfg(target_arch = "aarch64")] #[inline(always)] -fn strip_pac(addr: usize) -> usize { +pub(crate) fn strip_pac(addr: usize) -> usize { addr & 0x0000_FFFF_FFFF_FFFF } #[cfg(target_arch = "x86_64")] #[inline(always)] -fn strip_pac(addr: usize) -> usize { +pub(crate) fn strip_pac(addr: usize) -> usize { addr } /// Walk the frame-pointer chain starting from the given (pc, fp, sp) triple, /// usually obtained from a signal handler's ucontext. /// +/// `truncated` is `true` if the walk stopped because the output buffer (or +/// [`MAX_FRAMES`]) was full *and* at least one additional frame would have been +/// valid. A natural stop (end of chain, faulty load, implausible pointer) +/// produces `truncated = false`. +/// /// # Safety /// - `install_handler` must have been called. /// - Should generally be called from a signal handler where the target thread /// is stopped; walking a running thread's stack races with mutations. -pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> usize { +pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> CaptureResult { let limit = out.len().min(MAX_FRAMES); if limit == 0 { - return 0; + // No room even for the interrupted PC. The walk would have produced + // at least one frame, so this is truncation. + return CaptureResult { + frames_written: 0, + truncated: true, + }; } out[0] = pc as u64; @@ -65,47 +76,78 @@ pub unsafe fn unwind(pc: usize, mut fp: usize, sp: usize, out: &mut [u64]) -> us let stack_lo = sp; let stack_hi = sp.saturating_add(8 * 1024 * 1024); - while n < limit { + loop { + // Validate current fp before reading from it. if fp < stack_lo || fp >= stack_hi { - break; + return CaptureResult { + frames_written: n, + truncated: false, + }; } if fp & (core::mem::size_of::() - 1) != 0 { - break; // misaligned + return CaptureResult { + frames_written: n, + truncated: false, + }; // misaligned } let saved_fp = unsafe { load(fp as *const usize) }; if saved_fp == SAFE_LOAD_FAULT { - break; + return CaptureResult { + frames_written: n, + truncated: false, + }; } let ret_addr_slot = (fp + core::mem::size_of::()) as *const usize; let ret_addr = strip_pac(unsafe { load(ret_addr_slot) }); if ret_addr == SAFE_LOAD_FAULT { - break; + return CaptureResult { + frames_written: n, + truncated: false, + }; } if !(DEAD_ZONE..=usize::MAX - DEAD_ZONE).contains(&ret_addr) { - break; + return CaptureResult { + frames_written: n, + truncated: false, + }; } // Frame pointer must advance (stacks grow down -> saved_fp > fp) // but not by more than MAX_FRAME_SIZE. if saved_fp <= fp || saved_fp - fp > MAX_FRAME_SIZE { - break; + return CaptureResult { + frames_written: n, + truncated: false, + }; + } + + // We have a valid next frame. If we ran out of room to record it, + // the walk is truncated. + if n >= limit { + return CaptureResult { + frames_written: n, + truncated: true, + }; } out[n] = ret_addr as u64; n += 1; fp = saved_fp; } - - n } /// Unwind from inside a signal handler given the raw ucontext. /// +/// See [`unwind`] for details on the return value. +/// /// # Safety /// `ucontext` must be the pointer the kernel passed to a SA_SIGINFO handler. -pub unsafe fn unwind_from_ucontext(ucontext: *mut libc::c_void, out: &mut [u64]) -> usize { +pub(crate) unsafe fn unwind_from_ucontext( + ucontext: *mut libc::c_void, + out: &mut [u64], +) -> CaptureResult { let (pc, fp, sp) = unsafe { read_pc_fp_sp(ucontext) }; unsafe { unwind(pc, fp, sp, out) } } @@ -161,9 +203,13 @@ mod tests { stack[4] = base + 4 * sz; let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 3); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); // interrupted PC assert_eq!(out[1], 0x40_1000); assert_eq!(out[2], 0x40_2000); @@ -173,8 +219,12 @@ mod tests { fn frame_zero_is_always_the_interrupted_pc() { install(); let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0xDEAD, 0, 0x1000, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); assert_eq!(out[0], 0xDEAD); } @@ -183,8 +233,12 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let n = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, sp + 1, sp, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -192,8 +246,12 @@ mod tests { install(); let mut out = [0u64; MAX_FRAMES]; let sp = 0x7fff_0000_0000usize; - let n = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, sp.wrapping_sub(8), sp, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -207,8 +265,12 @@ mod tests { stack[1] = 0x100; // return addr in dead zone (< 0x1000) let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -220,8 +282,12 @@ mod tests { stack[0] = base + MAX_FRAME_SIZE + 8; // jump exceeds MAX_FRAME_SIZE let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] @@ -233,19 +299,66 @@ mod tests { stack[0] = base; // saved_fp == fp, doesn't advance let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, base, base, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; assert_eq!(n, 1); + assert!(!truncated); } #[test] fn respects_output_buffer_limit() { install(); let mut out = [0u64; 1]; - let n = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + let CaptureResult { + frames_written: n, .. + } = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; assert_eq!(n, 1); assert_eq!(out[0], 0x40_0000); } + #[test] + fn reports_truncation_when_buffer_fills_before_chain_ends() { + install(); + let sz = std::mem::size_of::(); + let mut stack = [0usize; 8]; + let base = stack.as_mut_ptr() as usize; + + // Chain of 3 valid frames (same structure as `walks_valid_frame_chain`). + stack[0] = base + 2 * sz; + stack[1] = 0x40_1000; + stack[2] = base + 4 * sz; + stack[3] = 0x40_2000; + stack[4] = base + 4 * sz; // terminates naturally here + + // Buffer fits only pc + 1 frame, so the 3rd frame is dropped. + let mut out = [0u64; 2]; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, base, base, &mut out) }; + assert_eq!(n, 2); + assert!( + truncated, + "should report truncation when output buffer fills" + ); + assert_eq!(out[0], 0x40_0000); + assert_eq!(out[1], 0x40_1000); + } + + #[test] + fn empty_buffer_reports_truncation() { + install(); + let mut out: [u64; 0] = []; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, 0, 0x1000, &mut out) }; + assert_eq!(n, 0); + assert!(truncated, "empty buffer can always hold more"); + } + fn page_size() -> usize { let ps = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; assert!(ps > 0, "sysconf(_SC_PAGESIZE) must return a positive value"); @@ -272,12 +385,16 @@ mod tests { // sp = fp keeps fp in range, page-aligned. let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(guard, ps) }, 0); assert_eq!(n, 1, "unwind must stop when saved_fp load faults"); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); } @@ -310,12 +427,16 @@ mod tests { unsafe { (fp as *mut usize).write(fp + 16) }; let mut out = [0u64; MAX_FRAMES]; - let n = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; + let CaptureResult { + frames_written: n, + truncated, + } = unsafe { unwind(0x40_0000, fp, fp, &mut out) }; // Unmap before the asserts so a panic doesn't leak the mapping. assert_eq!(unsafe { libc::munmap(region, 2 * ps) }, 0); assert_eq!(n, 1, "unwind must stop when ret_addr load faults"); + assert!(!truncated); assert_eq!(out[0], 0x40_0000); } } diff --git a/perf-self-profile/src/unwinder.rs b/perf-self-profile/src/unwinder.rs new file mode 100644 index 00000000..30e95b0e --- /dev/null +++ b/perf-self-profile/src/unwinder.rs @@ -0,0 +1,529 @@ +//! Framepointer-based stack unwinding +//! +//! This implementation uses a SIGSEGV fault handler to allow safe walking of stacks. Without +//! this, there is no way to perform framepointer unwinding without risking segfaults when walking +//! stacks where framepointers are not enabled. +//! +//! Because of this, the unwinder must be "[`install`ed](Unwinder::install)" before +//! you can use it to [`capture`](Unwinder::capture) a stack. +//! +//! The unwound stacks are only addresses. You must use a symbolizer separately to +//! convert the addresses into function names. + +/// Result of a [`Unwinder::capture`] call. +/// +/// The captured program counters are written into the output buffer supplied +/// to `capture`; this struct describes the metadata of the capture. +/// +/// `#[non_exhaustive]` so new fields can be added without breaking callers. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub struct CaptureResult { + /// Number of frames written into the caller's output buffer. + /// + /// Frames `out[0..frames_written]` are valid. + pub frames_written: usize, + /// `true` if the walk stopped because the output buffer (or the + /// internal `MAX_FRAMES` cap of 128) was full while at least one + /// additional frame was still walkable. When `true`, the outer + /// frames of the stack (closer to `main`) have been dropped. + pub truncated: bool, +} + +/// Handle that proves the SIGSEGV fault handler is installed. +/// +/// This type is zero-sized and can be freely copied and cloned. +#[derive(Clone, Copy, Debug)] +pub struct Unwinder { + _private: (), +} + +impl Unwinder { + /// Install the SIGSEGV fault handler used by stack capture. + /// Idempotent: safe to call multiple times from multiple threads. + /// + /// Returns `Err` if `sigaction` fails (Linux) or if the platform is + /// unsupported. + /// + /// # Requirements + /// - Frame pointers (build with `-C force-frame-pointers=yes`). + pub fn install() -> std::io::Result { + platform::install()?; + Ok(Self { _private: () }) + } + + /// Verify that our SIGSEGV handler is still the active handler for + /// SIGSEGV on this process. Returns `true` if the handler we installed + /// is still registered. + /// + /// Another library or the runtime may install its own SIGSEGV handler + /// after [`install`](Self::install) is called. If that handler does not + /// chain to ours, [`capture`](Self::capture) may crash on a bad frame + /// pointer instead of aborting the walk safely. Callers who need + /// defence against this can call `verify_handler` periodically or + /// before safety-critical captures. + /// + /// Performs one `sigaction` syscall. Not suitable for per-sample hot + /// paths. + pub fn verify_handler(&self) -> bool { + platform::verify_handler() + } + + /// Capture a stack trace of the calling thread into `out`. Returns a + /// [`CaptureResult`] describing the number of frames written and + /// whether the walk was truncated. Never allocates. + /// + /// # Frame-0 contract + /// `out[0]` is the return address of `capture` itself — i.e. a PC + /// *inside the caller of `capture`*. Subsequent frames walk outward + /// via the frame-pointer chain. Any `#[inline(never)]` shim inserted + /// between the user's code and `capture` will appear as an extra + /// frame; plain function calls with frame pointers enabled behave as + /// expected. + /// + /// # Buffer and truncation + /// At most `out.len().min(MAX_FRAMES)` frames are written (where + /// `MAX_FRAMES = 128`). If the real stack is deeper, innermost frames + /// are kept and outer frames are dropped; `CaptureResult::truncated` + /// is set to `true`. + /// + /// # Safety + /// - [`install`](Self::install) must have succeeded and the SIGSEGV + /// handler it registered must still be active. If another library + /// has replaced the SIGSEGV handler without chaining to ours, a + /// faulty frame-pointer chain can crash the process instead of + /// being caught. Use [`verify_handler`](Self::verify_handler) if + /// you need to defend against third-party signal handler + /// installation. + /// - Must not be called from inside a signal handler for SIGSEGV + /// (that would recurse into our own handler without bound). + /// - The calling thread's stack must be valid for frame-pointer walking + /// (binary compiled with `-C force-frame-pointers=yes`, no code + /// currently executing in a prologue/epilogue window where `rbp` + /// does not point at a saved-fp slot). + // Takes `&self` to prove that `Unwinder::install()` succeeded, even though + // no instance data is accessed internally. + #[inline(never)] + pub unsafe fn capture(&self, out: &mut [u64]) -> CaptureResult { + // Debug-only check that our SIGSEGV handler is still the active + // one. In release builds this is skipped to keep `capture` syscall-free + // on the hot path; callers who need this at runtime should use + // [`verify_handler`](Self::verify_handler) explicitly. + debug_assert!( + self.verify_handler(), + "Unwinder::capture called but our SIGSEGV handler is no longer active; \ + something replaced it without chaining. See Unwinder::verify_handler." + ); + // SAFETY: forwarding Unwinder::capture's own safety contract to + // platform::capture (handler installed, not in a SIGSEGV handler, + // frame pointers enabled). + unsafe { platform::capture(out) } + } +} + +#[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") +))] +mod platform { + use super::CaptureResult; + use crate::sys::fp_profiler::{handler_is_installed, install_handler, unwind::unwind}; + + pub fn install() -> std::io::Result<()> { + // SAFETY: installs the SIGSEGV handler for safe_load; idempotent. + unsafe { install_handler() } + } + + pub fn verify_handler() -> bool { + handler_is_installed() + } + + /// Called from [`Unwinder::capture`] (which is `#[inline(never)]`), so the + /// "current frame" observed here is `Unwinder::capture`'s frame. This + /// function is `#[inline(always)]` specifically so it does *not* introduce + /// another frame — see the `frame_zero_points_into_caller_of_capture` + /// test. + /// + /// # Safety + /// Same obligations as [`Unwinder::capture`]: handler installed, + /// not inside a SIGSEGV handler, frame pointers enabled. + #[inline(always)] + pub unsafe fn capture(out: &mut [u64]) -> CaptureResult { + // SAFETY: called from Unwinder::capture which forwards the full + // safety contract (handler installed, frame pointers enabled, not + // inside a SIGSEGV handler, not in a prologue/epilogue window). + let (pc, fp, sp) = unsafe { read_caller_regs() }; + // SAFETY: handler is installed (caller holds Unwinder), and we are + // not inside a SIGSEGV handler (see Unwinder::capture safety + // contract). + unsafe { unwind(pc, fp, sp, out) } + } + + /// Read `(pc, fp, sp)` such that `pc` is the PC to use for frame 0 and + /// `fp` is the saved frame pointer of the frame *above* the current one. + /// + /// Because this is `#[inline(always)]`, the `rbp`/`x29` read observes + /// `Unwinder::capture`'s frame (its one and only non-inlined ancestor). + /// - `*(rbp + 8)` on x86_64 / LR save slot on aarch64 is + /// `Unwinder::capture`'s return address → goes to `out[0]`. + /// - `*rbp` / `*x29` is the saved fp of the caller of `Unwinder::capture` + /// → where we start walking the chain. + /// + /// # Safety + /// - Must be called with `#[inline(always)]` preserved so the read + /// observes `Unwinder::capture`'s frame, not this helper's. If ever + /// actually inlined into a different caller or promoted to a + /// standalone frame, the returned `fp`/return-address semantics + /// change and the frame-0 contract breaks. + /// - Must only be called after [`install_handler`] has succeeded. + /// Reading `*fp` and `*(fp+8)` is a raw dereference of the stack; + /// the SIGSEGV handler installed by `install_handler` does *not* + /// cover these reads (it only covers `safe_load`). The reads are + /// safe only because — on a thread built with + /// `-C force-frame-pointers=yes` — the kernel/ABI guarantees that + /// `rbp`/`x29` always points at a valid `[saved_fp, ret_addr]` + /// pair for the currently executing function. + /// - The calling binary must be compiled with + /// `-C force-frame-pointers=yes`. Without frame pointers, `rbp` + /// may be used as a general-purpose register and the raw reads + /// will dereference arbitrary memory. + /// - Must not be called during function prologue/epilogue or other + /// windows where `rbp`/`x29` does not yet (or no longer) points at + /// a saved-fp slot. For the intended call site inside + /// `Unwinder::capture`'s body this is always satisfied; calling + /// from hand-written asm shims, signal-trampoline code, or from + /// within another `naked` function is not supported. + #[cfg(target_arch = "x86_64")] + #[inline(always)] + unsafe fn read_caller_regs() -> (usize, usize, usize) { + let fp: usize; + let sp: usize; + // SAFETY: Reading `rbp`/`rsp` with `nostack, nomem` has no memory + // side effects and cannot invalidate Rust's stack invariants; we + // do not modify either register. + unsafe { + core::arch::asm!( + "mov {fp}, rbp", + "mov {sp}, rsp", + fp = out(reg) fp, + sp = out(reg) sp, + options(nostack, nomem), + ); + } + // SAFETY: `fp` is `Unwinder::capture`'s frame pointer (see top-level + // # Safety note). On x86_64 System V, with frame pointers enabled, + // a compiler-generated frame begins with + // [saved_rbp : usize, return_addr : usize, ...] + // so `*fp` and `*(fp + 8)` are guaranteed to be valid reads of + // currently-live stack memory for this frame. + let ret_addr = unsafe { *(fp as *const usize).add(1) }; + let caller_fp = unsafe { *(fp as *const usize) }; + (ret_addr, caller_fp, sp) + } + + #[cfg(target_arch = "aarch64")] + #[inline(always)] + unsafe fn read_caller_regs() -> (usize, usize, usize) { + let fp: usize; + let sp: usize; + // SAFETY: Reading `x29`/`sp` with `nostack, nomem` has no memory + // side effects and cannot invalidate Rust's stack invariants; we + // do not modify either register. + unsafe { + core::arch::asm!( + "mov {fp}, x29", + "mov {sp}, sp", + fp = out(reg) fp, + sp = out(reg) sp, + options(nostack, nomem), + ); + } + // SAFETY: Same layout as x86_64 under AAPCS64 with frame pointers: + // [saved_fp (x29) : u64, saved_lr : u64, ...] + // so `*fp` and `*(fp + 8)` are valid reads of live stack memory. + let ret_addr = unsafe { *(fp as *const usize).add(1) }; + // Strip pointer authentication bits (ARMv8.3-A PAC). The saved LR may + // be signed when compiled with `-Z branch-protection=pac-ret`. + let ret_addr = crate::sys::fp_profiler::unwind::strip_pac(ret_addr); + let caller_fp = unsafe { *(fp as *const usize) }; + (ret_addr, caller_fp, sp) + } +} + +#[cfg(not(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") +)))] +mod platform { + use super::CaptureResult; + + pub fn install() -> std::io::Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "Unwinder is only available on Linux x86_64/aarch64", + )) + } + + pub fn verify_handler() -> bool { + false + } + + pub unsafe fn capture(_out: &mut [u64]) -> CaptureResult { + CaptureResult { + frames_written: 0, + truncated: false, + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + mod linux { + use super::super::*; + + /// Skip the test if something (e.g. ASAN) replaced our SIGSEGV handler + /// after install. Returns the Unwinder on success. + fn install_or_skip() -> Option { + let u = Unwinder::install().unwrap(); + if !u.verify_handler() { + eprintln!("skipping: SIGSEGV handler was replaced (sanitizer?)"); + return None; + } + Some(u) + } + + #[test] + fn install_is_idempotent() { + let r1 = Unwinder::install(); + let r2 = Unwinder::install(); + let r3 = Unwinder::install(); + assert!(r1.is_ok()); + assert!(r2.is_ok()); + assert!(r3.is_ok()); + } + + #[test] + fn install_is_idempotent_across_threads() { + let handles: Vec<_> = (0..8) + .map(|_| std::thread::spawn(Unwinder::install)) + .collect(); + for h in handles { + assert!(h.join().unwrap().is_ok()); + } + } + + #[test] + fn verify_handler_true_after_install() { + let Some(u) = install_or_skip() else { + return; + }; + assert!(u.verify_handler(), "handler should be active after install"); + } + + #[test] + fn capture_produces_frames() { + let Some(unwinder) = install_or_skip() else { + return; + }; + #[inline(never)] + fn helper(u: &Unwinder) -> (CaptureResult, [u64; 64]) { + let mut out = [0u64; 64]; + // SAFETY: handler installed via Unwinder::install above; test thread + // is not inside a signal handler. + let result = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + (result, out) + } + let (result, out) = helper(&unwinder); + assert!( + result.frames_written >= 2, + "expected at least 2 frames, got {}", + result.frames_written + ); + for (i, &addr) in out.iter().enumerate().take(result.frames_written) { + assert_ne!(addr, 0, "frame {i} must be non-zero"); + } + } + + /// Tighter version of the frame-0 contract test: verify that frame 0 + /// lands inside `helper` (the caller of `capture`) rather than inside + /// `Unwinder::capture` itself. This catches the bug where the old + /// double-`#[inline(never)]` layering made frame 0 point at an + /// instruction inside `Unwinder::capture`'s body. + #[test] + fn frame_zero_points_into_caller_of_capture() { + let Some(unwinder) = install_or_skip() else { + return; + }; + + #[inline(never)] + fn helper(u: &Unwinder) -> u64 { + let mut out = [0u64; 64]; + // SAFETY: same as capture_produces_frames. + let result = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + assert!(result.frames_written >= 1); + out[0] + } + + let frame0 = helper(&unwinder); + let helper_start = helper as *const () as u64; + let capture_fn = Unwinder::capture as *const () as u64; + + // Frame 0 must NOT be inside Unwinder::capture's body. Allow a + // generous 4 KiB window around its entry in case of debug bloat. + let capture_window = 4096u64; + assert!( + !(frame0 >= capture_fn && frame0 < capture_fn + capture_window), + "frame 0 {:#x} must not be inside Unwinder::capture [{:#x}..{:#x})", + frame0, + capture_fn, + capture_fn + capture_window, + ); + + // Stronger: frame 0 should land inside `helper`'s body. Debug + // builds can be very large (100+ KiB per function with full + // debuginfo and no optimization), so use a generous window. + let helper_window = 512 * 1024u64; + assert!( + frame0 >= helper_start && frame0 < helper_start + helper_window, + "frame 0 {:#x} should be inside helper [{:#x}..{:#x})", + frame0, + helper_start, + helper_start + helper_window, + ); + } + + #[test] + fn capture_respects_output_buffer_limit() { + let Some(unwinder) = install_or_skip() else { + return; + }; + let mut out = [0u64; 1]; + // SAFETY: handler installed; test context is not a signal handler. + let result = unsafe { unwinder.capture(&mut out) }; + assert!( + result.frames_written <= 1, + "expected at most 1 frame, got {}", + result.frames_written + ); + if result.frames_written == 1 { + assert_ne!(out[0], 0, "frame 0 must be non-zero when written"); + } + } + + #[test] + fn capture_reports_truncation_with_tiny_buffer() { + let Some(unwinder) = install_or_skip() else { + return; + }; + // Build a small but real call chain so a 1-slot buffer is bound + // to truncate. + #[inline(never)] + fn depth_2(u: &Unwinder) -> CaptureResult { + let mut out = [0u64; 1]; + // SAFETY: handler installed above. + let r = unsafe { u.capture(&mut out) }; + std::hint::black_box(&out); + r + } + #[inline(never)] + fn depth_1(u: &Unwinder) -> CaptureResult { + std::hint::black_box(depth_2(u)) + } + let result = depth_1(&unwinder); + assert_eq!(result.frames_written, 1); + assert!( + result.truncated, + "a 1-slot buffer with a multi-frame stack must report truncated" + ); + } + + #[test] + fn capture_with_empty_buffer_reports_truncation() { + let Some(unwinder) = install_or_skip() else { + return; + }; + // SAFETY: handler installed above. + let result = unsafe { unwinder.capture(&mut []) }; + assert_eq!(result.frames_written, 0); + assert!(result.truncated); + } + + /// In debug builds, `capture` panics via `debug_assert!` when its SIGSEGV + /// handler has been replaced out from under it. This protects against + /// third-party signal handlers silently breaking stack-walk fault + /// recovery. + /// + /// Nextest runs each test in its own process, so replacing the SIGSEGV + /// handler here cannot affect sibling tests. + #[cfg(debug_assertions)] + #[test] + fn capture_debug_asserts_when_handler_replaced() { + use std::panic::{AssertUnwindSafe, catch_unwind}; + + let Some(unwinder) = install_or_skip() else { + return; + }; + assert!(unwinder.verify_handler()); + + // Replace SIGSEGV with SIG_IGN so verify_handler() returns false. + // SAFETY: we restore our handler below on all exit paths via the + // guard, making this safe even under `cargo test`'s threaded harness. + let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() }; + new_action.sa_sigaction = libc::SIG_IGN; + unsafe { libc::sigemptyset(&mut new_action.sa_mask) }; + let mut old_action: libc::sigaction = unsafe { std::mem::zeroed() }; + let rc = unsafe { libc::sigaction(libc::SIGSEGV, &new_action, &mut old_action) }; + assert_eq!(rc, 0, "failed to replace SIGSEGV handler"); + + struct RestoreGuard(libc::sigaction); + impl Drop for RestoreGuard { + fn drop(&mut self) { + // Restore the original handler so concurrent tests under + // `cargo test` (threaded harness) still have fault recovery. + let _ = + unsafe { libc::sigaction(libc::SIGSEGV, &self.0, std::ptr::null_mut()) }; + } + } + let _guard = RestoreGuard(old_action); + + assert!( + !unwinder.verify_handler(), + "precondition: handler should appear replaced" + ); + + // capture() must panic via debug_assert before doing any stack + // walking. + let mut out = [0u64; 16]; + let result = catch_unwind(AssertUnwindSafe(|| { + // SAFETY: we intentionally violate the "handler installed" + // precondition to verify the debug_assert fires. The panic + // from debug_assert aborts before the frame walk runs. + let _ = unsafe { unwinder.capture(&mut out) }; + })); + assert!( + result.is_err(), + "capture should panic via debug_assert when handler is replaced" + ); + } + } + + #[cfg(not(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + )))] + mod unsupported { + use super::super::*; + + #[test] + fn install_returns_unsupported() { + let err = Unwinder::install().unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::Unsupported); + } + } +} diff --git a/perf-self-profile/tests/symbolize_stack.rs b/perf-self-profile/tests/symbolize_stack.rs new file mode 100644 index 00000000..584b1874 --- /dev/null +++ b/perf-self-profile/tests/symbolize_stack.rs @@ -0,0 +1,46 @@ +#![cfg(target_os = "linux")] + +use dial9_perf_self_profile::unwinder::Unwinder; +use dial9_perf_self_profile::{read_proc_maps, resolve_symbol_with_maps}; + +use blazesym::symbolize::Symbolizer; + +#[inline(never)] +fn outer(unwinder: &Unwinder) -> Vec { + std::hint::black_box(inner(unwinder)) +} + +#[inline(never)] +fn inner(unwinder: &Unwinder) -> Vec { + let mut buf = [0u64; 64]; + // SAFETY: handler installed by caller; not inside a signal handler. + let result = unsafe { unwinder.capture(&mut buf) }; + assert!(result.frames_written >= 3, "expected at least 3 frames"); + buf[..result.frames_written].to_vec() +} + +#[test] +fn capture_and_symbolize() { + let unwinder = Unwinder::install().unwrap(); + if !unwinder.verify_handler() { + eprintln!("skipping: SIGSEGV handler was replaced (sanitizer?)"); + return; + } + + let stack = outer(&unwinder); + let maps = read_proc_maps(); + let symbolizer = Symbolizer::new(); + + let names: Vec = stack + .iter() + .filter_map(|&addr| resolve_symbol_with_maps(addr, &symbolizer, &maps).name) + .collect(); + assert_eq!( + names[0..3], + vec![ + "symbolize_stack::inner", + "symbolize_stack::outer", + "symbolize_stack::capture_and_symbolize" + ] + ); +}