From c0f9a53866cf86adb3f9796d2c1b6d55f3b2a02a Mon Sep 17 00:00:00 2001 From: Ernesto Cambuston Date: Sat, 16 May 2026 18:49:12 -0700 Subject: [PATCH 1/5] worker: set QoS USER_INITIATED on macOS for P-core preference Apple Silicon's XNU scheduler will park UTILITY/BACKGROUND threads on efficiency cores. Single-thread-bursty workloads (swift-frontend, clang) typical in iOS RBE builds can run 2x-3x slower on an E-core, so tag the worker process with QOS_CLASS_USER_INITIATED to bias scheduling toward P-cores. The setter runs in three places: - Main thread before tokio runtime creation so worker threads inherit the class via pthread QoS inheritance. - tokio Builder::on_thread_start hook as belt-and-suspenders for any thread (e.g. blocking pool) that misses inheritance. - Top of LocalWorker::run for the same reason. Implementation uses libc's pthread_set_qos_class_self_np binding; the new `nativelink_worker::qos` module is compile-gated so non-macOS targets emit no call and pull in no symbol. A round-trip test on macOS verifies the kernel accepted the class change. Ported from upstream commit 0fce813b (TraceMachina/nativelink #2243). Co-Authored-By: Claude Opus 4.7 (1M context) --- nativelink-worker/BUILD.bazel | 3 + nativelink-worker/Cargo.toml | 3 + nativelink-worker/src/lib.rs | 1 + nativelink-worker/src/local_worker.rs | 8 ++ nativelink-worker/src/qos.rs | 115 ++++++++++++++++++++++++++ src/bin/nativelink.rs | 11 +++ 6 files changed, 141 insertions(+) create mode 100644 nativelink-worker/src/qos.rs diff --git a/nativelink-worker/BUILD.bazel b/nativelink-worker/BUILD.bazel index 6e7847e75..bf6010cdf 100644 --- a/nativelink-worker/BUILD.bazel +++ b/nativelink-worker/BUILD.bazel @@ -13,6 +13,7 @@ rust_library( "src/directory_cache.rs", "src/lib.rs", "src/local_worker.rs", + "src/qos.rs", "src/running_actions_manager.rs", "src/worker_api_client_wrapper.rs", "src/worker_utils.rs", @@ -51,6 +52,7 @@ rust_library( "@crates//:uuid", ] + select({ "@platforms//os:linux": ["@crates//:libc"], + "@platforms//os:macos": ["@crates//:libc"], "//conditions:default": [], }), ) @@ -98,6 +100,7 @@ rust_test_suite( "@crates//:which", ] + select({ "@platforms//os:linux": ["@crates//:libc"], + "@platforms//os:macos": ["@crates//:libc"], "//conditions:default": [], }), ) diff --git a/nativelink-worker/Cargo.toml b/nativelink-worker/Cargo.toml index 5888eee0e..bb22d2732 100644 --- a/nativelink-worker/Cargo.toml +++ b/nativelink-worker/Cargo.toml @@ -58,6 +58,9 @@ uuid = { version = "1.16.0", default-features = false, features = [ [target.'cfg(target_os = "linux")'.dependencies] libc = { version = "0.2.183", default-features = false } +[target.'cfg(target_os = "macos")'.dependencies] +libc = { version = "0.2.183", default-features = false } + [dev-dependencies] nativelink-macro = { path = "../nativelink-macro" } diff --git a/nativelink-worker/src/lib.rs b/nativelink-worker/src/lib.rs index 22aaa5981..8c6b0ec27 100644 --- a/nativelink-worker/src/lib.rs +++ b/nativelink-worker/src/lib.rs @@ -16,6 +16,7 @@ pub mod directory_cache; pub mod local_worker; #[cfg(target_os = "linux")] pub mod namespace_utils; +pub mod qos; pub mod running_actions_manager; pub mod worker_api_client_wrapper; pub mod worker_utils; diff --git a/nativelink-worker/src/local_worker.rs b/nativelink-worker/src/local_worker.rs index c2f6e37e7..0e3a9d760 100644 --- a/nativelink-worker/src/local_worker.rs +++ b/nativelink-worker/src/local_worker.rs @@ -798,6 +798,14 @@ impl LocalWorker, ) -> Result<(), Error> { + // Belt-and-suspenders QoS bump: the main binary already calls + // this before runtime creation so the tokio worker threads + // inherit P-core preference via pthread QoS inheritance, but + // any thread that reaches this point should also be tagged in + // case it was spawned by a path that bypassed `on_thread_start`. + // No-op on non-macOS. + let _ = crate::qos::set_user_initiated(); + let sleep_fn = self .sleep_fn .take() diff --git a/nativelink-worker/src/qos.rs b/nativelink-worker/src/qos.rs new file mode 100644 index 000000000..7e1eac8b6 --- /dev/null +++ b/nativelink-worker/src/qos.rs @@ -0,0 +1,115 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// See LICENSE file for details +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Darwin `QoS` (Quality of Service) helpers for worker scheduling. +//! +//! Apple Silicon (M-series) CPUs have a heterogeneous topology with +//! performance ("P") and efficiency ("E") cores. XNU's scheduler routes +//! threads to P or E cores in part based on the thread's `QoS` class. The +//! default class assigned to long-running background daemons is typically +//! `UTILITY` or `BACKGROUND`, both of which the scheduler may park on +//! E-cores. +//! +//! Single-thread-bursty workloads such as `swift-frontend` and `clang` +//! invocations (typical in iOS RBE builds) can run 2x–3x slower when +//! pinned to an E-core. Tagging the worker process with +//! `QOS_CLASS_USER_INITIATED` tells the scheduler to treat its threads +//! as foreground-equivalent and bias placement toward P-cores. +//! +//! On Linux and Windows these helpers compile away to nothing — they are +//! intentionally not behind a runtime branch so non-macOS builds never +//! emit a call. + +/// Sets the calling thread's `QoS` class to `USER_INITIATED` on macOS. +/// +/// On non-macOS targets this is a compile-time no-op (the call site +/// expands to nothing after monomorphization / dead-code elimination). +/// +/// Returns `true` if the call succeeded or the platform doesn't need it; +/// returns `false` only on macOS when the underlying pthread call fails. +/// +/// Safe to call from any thread, including tokio runtime worker threads +/// via `Builder::on_thread_start`. +#[inline] +pub fn set_user_initiated() -> bool { + #[cfg(target_os = "macos")] + { + // SAFETY: `pthread_set_qos_class_self_np` is a thread-local + // setter with no preconditions on the caller; passing a valid + // enum variant and relative priority 0 is always defined. + let ret = unsafe { + libc::pthread_set_qos_class_self_np(libc::qos_class_t::QOS_CLASS_USER_INITIATED, 0) + }; + ret == 0 + } + #[cfg(not(target_os = "macos"))] + { + true + } +} + +#[cfg(all(test, target_os = "macos"))] +mod macos_tests { + use super::set_user_initiated; + + /// Proves the `QoS` call is wired up on macOS and the underlying + /// Darwin symbol resolves at link time. A failure here means the + /// worker would silently keep running on E-cores. + #[test] + fn sets_user_initiated_on_current_thread() { + assert!( + set_user_initiated(), + "pthread_set_qos_class_self_np(USER_INITIATED) returned non-zero", + ); + + // Read it back to confirm the kernel accepted the new class. + let mut class: libc::qos_class_t = libc::qos_class_t::QOS_CLASS_UNSPECIFIED; + let mut rel_prio: libc::c_int = 0; + // SAFETY: out-pointers point to stack-allocated, properly sized + // and aligned storage owned by this thread. + let ret = unsafe { + libc::pthread_get_qos_class_np( + libc::pthread_self(), + core::ptr::from_mut(&mut class), + core::ptr::from_mut(&mut rel_prio), + ) + }; + assert_eq!(ret, 0, "pthread_get_qos_class_np failed: {ret}"); + // `qos_class_t` is a `#[repr(u32)]` C enum that does not derive + // `PartialEq` in libc, so compare the underlying discriminants. + assert_eq!( + class as u32, + libc::qos_class_t::QOS_CLASS_USER_INITIATED as u32, + "`QoS` class did not update; thread will be eligible for E-core scheduling", + ); + } +} + +#[cfg(all(test, not(target_os = "macos")))] +mod non_macos_tests { + use super::set_user_initiated; + + /// On Linux/Windows the function must be a true no-op that always + /// reports success — there is no runtime cost and no platform call. + #[test] + fn is_a_noop_on_non_macos() { + assert!(set_user_initiated()); + } +} + +/// Compile-time assertion: when `target_os` is not `macos`, this module +/// must not reference any libc symbol. Reviewers can `grep "extern crate +/// libc"` or inspect this constant to verify the no-op story. +#[cfg(not(target_os = "macos"))] +pub const NON_MACOS_IS_NOOP: () = (); diff --git a/src/bin/nativelink.rs b/src/bin/nativelink.rs index 2826dbb71..aa1e9a12f 100644 --- a/src/bin/nativelink.rs +++ b/src/bin/nativelink.rs @@ -718,8 +718,19 @@ fn get_config() -> Result { } fn main() -> Result<(), Box> { + // Set QoS to USER_INITIATED on the main thread *before* the tokio + // runtime is built so the spawned worker threads inherit P-core + // scheduling preference via pthread QoS inheritance on Apple + // Silicon. `on_thread_start` below is a belt-and-suspenders hook + // for any thread that misses the inherited class (e.g. tokio + // blocking pool threads created lazily). No-op on non-macOS. + let _ = nativelink_worker::qos::set_user_initiated(); + #[expect(clippy::disallowed_methods, reason = "starting main runtime")] let runtime = tokio::runtime::Builder::new_multi_thread() + .on_thread_start(|| { + let _ = nativelink_worker::qos::set_user_initiated(); + }) .enable_all() .build()?; From 28dcb3ddae87e9e229b01360330698d5dd9c2c6a Mon Sep 17 00:00:00 2001 From: Ernesto Cambuston Date: Sat, 16 May 2026 19:05:52 -0700 Subject: [PATCH 2/5] worker: test tokio worker thread inherits USER_INITIATED QoS The QoS scheme in PR #2243 hinges on tokio worker threads actually seeing QOS_CLASS_USER_INITIATED at task-runtime; without an end-to-end test the on_thread_start hook could silently regress (e.g. if the hook ran on the wrong thread or the kernel rejected the class) and the worker would quietly fall back to E-core scheduling. Adds a macOS-only test that builds a fresh multi-threaded tokio runtime with the same on_thread_start hook used in main, spawns a task to force execution on a worker thread, and reads back the class with pthread_get_qos_class_np. Also refactors the existing single-thread test to share a helper. Co-Authored-By: Claude Opus 4.7 (1M context) --- nativelink-worker/src/qos.rs | 61 +++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/nativelink-worker/src/qos.rs b/nativelink-worker/src/qos.rs index 7e1eac8b6..f79df3157 100644 --- a/nativelink-worker/src/qos.rs +++ b/nativelink-worker/src/qos.rs @@ -63,17 +63,9 @@ pub fn set_user_initiated() -> bool { mod macos_tests { use super::set_user_initiated; - /// Proves the `QoS` call is wired up on macOS and the underlying - /// Darwin symbol resolves at link time. A failure here means the - /// worker would silently keep running on E-cores. - #[test] - fn sets_user_initiated_on_current_thread() { - assert!( - set_user_initiated(), - "pthread_set_qos_class_self_np(USER_INITIATED) returned non-zero", - ); - - // Read it back to confirm the kernel accepted the new class. + /// Reads the current thread's `QoS` class via `pthread_get_qos_class_np`. + /// Panics with a contextual message on failure (only called from tests). + fn current_qos_class() -> libc::qos_class_t { let mut class: libc::qos_class_t = libc::qos_class_t::QOS_CLASS_UNSPECIFIED; let mut rel_prio: libc::c_int = 0; // SAFETY: out-pointers point to stack-allocated, properly sized @@ -86,14 +78,59 @@ mod macos_tests { ) }; assert_eq!(ret, 0, "pthread_get_qos_class_np failed: {ret}"); + class + } + + /// Proves the `QoS` call is wired up on macOS and the underlying + /// Darwin symbol resolves at link time. A failure here means the + /// worker would silently keep running on E-cores. + #[test] + fn sets_user_initiated_on_current_thread() { + assert!( + set_user_initiated(), + "pthread_set_qos_class_self_np(USER_INITIATED) returned non-zero", + ); // `qos_class_t` is a `#[repr(u32)]` C enum that does not derive // `PartialEq` in libc, so compare the underlying discriminants. assert_eq!( - class as u32, + current_qos_class() as u32, libc::qos_class_t::QOS_CLASS_USER_INITIATED as u32, "`QoS` class did not update; thread will be eligible for E-core scheduling", ); } + + /// Validates the load-bearing claim that tokio worker threads created + /// with a `Builder::on_thread_start` hook calling `set_user_initiated` + /// observe `QOS_CLASS_USER_INITIATED` from inside spawned tasks. This + /// mirrors the wiring in `src/bin/nativelink.rs::main`. Without this + /// test the entire QoS scheme is unverified at the integration level. + #[test] + fn tokio_worker_threads_inherit_user_initiated_via_on_thread_start() { + // Deliberately build a fresh runtime in-test (do not reuse a + // global one) so the hook is exercised on freshly-spawned + // worker threads with whatever class they were born with. + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .on_thread_start(|| { + assert!(set_user_initiated(), "hook failed in worker thread"); + }) + .enable_all() + .build() + .expect("build tokio runtime"); + + let observed: u32 = rt.block_on(async { + // Force execution on a worker thread (not the caller). + tokio::spawn(async { current_qos_class() as u32 }) + .await + .expect("join spawned task") + }); + + assert_eq!( + observed, + libc::qos_class_t::QOS_CLASS_USER_INITIATED as u32, + "tokio worker thread did not inherit USER_INITIATED from on_thread_start", + ); + } } #[cfg(all(test, not(target_os = "macos")))] From 492ca3cc1c2bf01385285dab70237cf5de6950cc Mon Sep 17 00:00:00 2001 From: Ernesto Cambuston Date: Sat, 16 May 2026 20:03:14 -0700 Subject: [PATCH 3/5] qos: justify disallowed_methods escape on tokio_worker_threads_inherit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The on_thread_start inheritance test must construct a custom-built runtime via `Builder::new_multi_thread()` and drive it with `block_on` — that is the unit under test. No `nativelink-util::task` wrapper exposes a custom-built runtime with a thread-start hook, so the disallowed_methods lint cannot be addressed at the root cause. Use `#[expect(clippy::disallowed_methods, reason = ...)]` per the modern Rust 2024 idiom (fails if the lint stops firing, with a reviewer-visible justification) rather than a silent `#[allow]`. Mirrors the same justified escape already used in src/bin/nativelink.rs::main. --- nativelink-worker/src/qos.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/nativelink-worker/src/qos.rs b/nativelink-worker/src/qos.rs index f79df3157..bd32a9835 100644 --- a/nativelink-worker/src/qos.rs +++ b/nativelink-worker/src/qos.rs @@ -103,8 +103,20 @@ mod macos_tests { /// with a `Builder::on_thread_start` hook calling `set_user_initiated` /// observe `QOS_CLASS_USER_INITIATED` from inside spawned tasks. This /// mirrors the wiring in `src/bin/nativelink.rs::main`. Without this - /// test the entire QoS scheme is unverified at the integration level. + /// test the entire `QoS` scheme is unverified at the integration level. + /// + /// This is the one place in the worker crate that must construct a + /// fresh `tokio::runtime::Builder::new_multi_thread()` and drive it + /// with `block_on` — the unit under test *is* the `on_thread_start` + /// hook on a custom-built runtime, which `nativelink-util::task` and + /// `#[nativelink_test]` do not expose. The `#[expect]` mirrors the + /// same justified escape used in `src/bin/nativelink.rs::main`. #[test] + #[expect( + clippy::disallowed_methods, + reason = "test exercises `Builder::on_thread_start` + `block_on`; \ + no util wrapper exposes a custom-built runtime with a thread-start hook" + )] fn tokio_worker_threads_inherit_user_initiated_via_on_thread_start() { // Deliberately build a fresh runtime in-test (do not reuse a // global one) so the hook is exercised on freshly-spawned From 4bf440130e080a09df1e0ec45ccaef9588359540 Mon Sep 17 00:00:00 2001 From: Ernesto Cambuston Date: Sat, 16 May 2026 21:13:07 -0700 Subject: [PATCH 4/5] qos: split set_user_initiated into cfg-gated fn / const fn The previous single-definition `pub fn set_user_initiated() -> bool` had a `#[cfg(target_os = "macos")]` block that called libc and a `#[cfg(not(...))]` block that returned `true`. On Linux CI clippy sees only the trivial `true` arm and fires `missing_const_for_fn`, failing ubuntu, asan, Bazel Dev/ubuntu, and every dependent rbe-* job. This did not reproduce on macOS because the macOS arm calls libc, which is not const-eligible, so clippy stays silent. Split into two cfg-gated definitions: the macOS impl stays a regular `pub fn` because `libc::pthread_set_qos_class_self_np` is not const; the non-macOS impl becomes `pub const fn` returning `true`. Call sites are unchanged, both arms still return `bool`, and the existing `qos::macos_tests::*` continue to apply since they were already gated on `#[cfg(target_os = "macos")]`. Doc comments are now split per arm and specialised to each platform's actual behaviour. Splitting (rather than `#[allow(missing_const_for_fn)]` on a single function) is the right fix because the lint is accurate for the non-macOS arm in isolation; suppressing it would hide a legitimate const-fn opportunity and mask future bugs on whichever platform clippy runs against. --- nativelink-worker/src/qos.rs | 40 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/nativelink-worker/src/qos.rs b/nativelink-worker/src/qos.rs index bd32a9835..ad2bdb7b9 100644 --- a/nativelink-worker/src/qos.rs +++ b/nativelink-worker/src/qos.rs @@ -33,30 +33,32 @@ /// Sets the calling thread's `QoS` class to `USER_INITIATED` on macOS. /// -/// On non-macOS targets this is a compile-time no-op (the call site -/// expands to nothing after monomorphization / dead-code elimination). -/// -/// Returns `true` if the call succeeded or the platform doesn't need it; -/// returns `false` only on macOS when the underlying pthread call fails. +/// Returns `true` if the underlying `pthread_set_qos_class_self_np` +/// call succeeded; returns `false` if it failed. /// /// Safe to call from any thread, including tokio runtime worker threads /// via `Builder::on_thread_start`. +#[cfg(target_os = "macos")] #[inline] pub fn set_user_initiated() -> bool { - #[cfg(target_os = "macos")] - { - // SAFETY: `pthread_set_qos_class_self_np` is a thread-local - // setter with no preconditions on the caller; passing a valid - // enum variant and relative priority 0 is always defined. - let ret = unsafe { - libc::pthread_set_qos_class_self_np(libc::qos_class_t::QOS_CLASS_USER_INITIATED, 0) - }; - ret == 0 - } - #[cfg(not(target_os = "macos"))] - { - true - } + // SAFETY: `pthread_set_qos_class_self_np` is a thread-local + // setter with no preconditions on the caller; passing a valid + // enum variant and relative priority 0 is always defined. + let ret = unsafe { + libc::pthread_set_qos_class_self_np(libc::qos_class_t::QOS_CLASS_USER_INITIATED, 0) + }; + ret == 0 +} + +/// Compile-time no-op on non-macOS targets. +/// +/// Always returns `true`. The call site expands to nothing after +/// inlining / dead-code elimination, so non-macOS builds never emit +/// a runtime branch or a libc call. +#[cfg(not(target_os = "macos"))] +#[inline] +pub const fn set_user_initiated() -> bool { + true } #[cfg(all(test, target_os = "macos"))] From d01380bfc2de4e03d34b848d2d9db1355c24e12d Mon Sep 17 00:00:00 2001 From: Ernesto Cambuston Date: Sat, 16 May 2026 21:21:30 -0700 Subject: [PATCH 5/5] ci: retrigger after GitHub 502 fetching rules_kotlin tarball