From c1de8ceda73a467e7c9c8f3aa975248513aa65c6 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Wed, 13 May 2026 00:56:44 +0000 Subject: [PATCH 1/2] fix(test): make test_schedstat_fd_closed_on_thread_exit not flaky The previous assertion checked fcntl(fd, F_GETFD) != -1 after the spawned thread exited. Linux readily recycles fd numbers, and other tests in the same suite open many files concurrently (including their own per-thread schedstat fds via tokio worker poll callbacks). When a sibling test opened a file in the small window after our spawned thread closed its schedstat fd, the fd number got reused and the check incorrectly reported a leak. Capture the path the fd points to via /proc/self/fd/ inside the spawned thread, and after join verify that the fd is either closed or now points at a different file. Each thread opens schedstat for its own tid, so /proc/self/task//schedstat is unique to the spawned thread's open and no concurrent test can collide on it. Verified by running the full cargo test -p dial9-tokio-telemetry --lib suite 13 times in a row with zero failures (previously failed roughly 3/10 runs), and confirmed the new test still catches a real leak by temporarily mem::forget'ing the OwnedFd in the production path. --- dial9-tokio-telemetry/src/telemetry/events.rs | 48 +++++++++++++++---- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/dial9-tokio-telemetry/src/telemetry/events.rs b/dial9-tokio-telemetry/src/telemetry/events.rs index e17b9232..3d0a0745 100644 --- a/dial9-tokio-telemetry/src/telemetry/events.rs +++ b/dial9-tokio-telemetry/src/telemetry/events.rs @@ -609,19 +609,49 @@ mod tests { #[test] #[cfg(any(target_os = "linux", target_os = "android"))] fn test_schedstat_fd_closed_on_thread_exit() { - // fcntl(fd, F_GETFD) returns -1 with errno=EBADF for closed fds. - // SAFETY: F_GETFD with a raw fd is side-effect free. - fn fd_is_open(fd: std::os::fd::RawFd) -> bool { - unsafe { libc::fcntl(fd, libc::F_GETFD) != -1 } + // We cannot just check `fcntl(fd, F_GETFD) != -1` after the thread + // exits: the kernel readily recycles fd numbers, and other tests in the + // suite open many files (including their own per-thread schedstat fds) + // concurrently. A closed fd may immediately be reused by an unrelated + // open in another thread, making a naive open-check flaky. + // + // Instead, record the path the fd points at (via /proc/self/fd/) + // *inside* the spawned thread, and after join check whether the fd is + // either closed or now points at a different file. Each thread opens + // schedstat for its own tid, so the captured path + // `/proc/self/task//schedstat` is unique to the spawned + // thread's open: no other thread will ever open that exact path. If the + // fd is still open and still points at it, the OwnedFd was leaked. + + fn fd_target(fd: std::os::fd::RawFd) -> Option { + std::fs::read_link(format!("/proc/self/fd/{fd}")).ok() } - let fd = std::thread::spawn(|| SchedStat::read_current().unwrap().fd) - .join() - .unwrap(); + let (fd, opened_path) = std::thread::spawn(|| { + let fd = SchedStat::read_current().unwrap().fd; + let path = fd_target(fd).expect("readlink /proc/self/fd/ in live thread"); + (fd, path) + }) + .join() + .unwrap(); + // Sanity-check: confirm we actually captured a schedstat path so the + // assertion below is meaningful (rather than passing because we read + // the wrong link). assert!( - !fd_is_open(fd), - "schedstat fd {fd} leaked after thread exit" + opened_path.to_string_lossy().ends_with("/schedstat"), + "expected schedstat path, got {opened_path:?}" ); + + match fd_target(fd) { + None => { /* fd is closed - good. */ } + Some(now) if now != opened_path => { + // fd was closed and the slot was reused for an unrelated open + // in another thread. That still means our OwnedFd was dropped. + } + Some(now) => panic!( + "schedstat fd {fd} leaked after thread exit (still points at {now:?})" + ), + } } } From cc6bb96e89ac9b3eeccbbfc9a391070d19377b65 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Wed, 13 May 2026 14:22:05 +0000 Subject: [PATCH 2/2] style: fix nightly rustfmt match arm formatting --- dial9-tokio-telemetry/src/telemetry/events.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dial9-tokio-telemetry/src/telemetry/events.rs b/dial9-tokio-telemetry/src/telemetry/events.rs index 3d0a0745..63cc7dfb 100644 --- a/dial9-tokio-telemetry/src/telemetry/events.rs +++ b/dial9-tokio-telemetry/src/telemetry/events.rs @@ -649,9 +649,9 @@ mod tests { // fd was closed and the slot was reused for an unrelated open // in another thread. That still means our OwnedFd was dropped. } - Some(now) => panic!( - "schedstat fd {fd} leaked after thread exit (still points at {now:?})" - ), + Some(now) => { + panic!("schedstat fd {fd} leaked after thread exit (still points at {now:?})") + } } } }