Skip to content

Commit 57cf69c

Browse files
authored
perf: Deduplicate file hashing and parallelize globwalks (#11902)
## Summary Optimizes `turbo run --dry` wall-clock time by up to 1.48x on large monorepos by eliminating redundant file hashing work and removing a serialization bottleneck in globwalk operations. ### Benchmarks Tested across three repos of varying size: | Repo | Packages | Before | After | Speedup | |------|----------|--------|-------|---------| | large | ~1000 | 5.903s | 3.999s | **1.48x** | | medium | ~120 | 1.461s | 1.380s | 1.06x | | small | ~6 | 0.659s | 0.693s | ~1.0x (noise) | The improvement scales with repo size — specifically with how many tasks share the same `(package, inputs)` combination. ### Changes **File hash deduplication** — Multiple tasks in the same package with identical `inputs` config (e.g. `build`, `lint`, `typecheck` all in one package) previously each ran an independent globwalk + file hash computation. Now tasks are grouped by `(package_path, globs, include_default)` and each unique combination is computed once, with results shared across tasks. **Parallel globwalks via retry-on-EMFILE** — The previous `IoSemaphore` (max=1) serialized all globwalk operations to prevent fd exhaustion, making this the dominant bottleneck on large repos. This replaces the semaphore with retry-with-exponential-backoff on `EMFILE` errors (the same pattern Node's `graceful-fs` uses), allowing globwalks to run fully parallel on rayon. If the OS returns "too many open files", the operation sleeps briefly and retries — up to 10 times with exponential backoff capped at 1s. **Zero-copy lockfile dependency lookups** — `Lockfile::all_dependencies` now returns `Cow<'_, HashMap<String, String>>` instead of cloning the HashMap on every call. For pnpm (which pre-builds a dependency index), this eliminates ~329k HashMap clones during transitive closure resolution. **Optimized transitive closure cache keys** — The `DashMap` resolve cache now uses a single null-byte-separated `String` key built into a reusable buffer, instead of allocating a `(String, String, String)` tuple per lookup. **HashMap importers for pnpm** — Converted pnpm's `importers` field from `BTreeMap` to `HashMap` (with sorted serialization) for O(1) workspace lookups during `resolve_package`.
1 parent b21423e commit 57cf69c

File tree

16 files changed

+350
-289
lines changed

16 files changed

+350
-289
lines changed

crates/turborepo-engine/src/builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,8 @@ mod test {
11671167
fn all_dependencies(
11681168
&self,
11691169
_key: &str,
1170-
) -> Result<Option<HashMap<String, String>>, turborepo_lockfiles::Error> {
1170+
) -> Result<Option<std::borrow::Cow<'_, HashMap<String, String>>>, turborepo_lockfiles::Error>
1171+
{
11711172
unreachable!()
11721173
}
11731174

crates/turborepo-globwalk/src/lib.rs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ pub fn globwalk_with_settings(
460460
walk_type: WalkType,
461461
settings: Settings,
462462
) -> Result<HashSet<AbsoluteSystemPathBuf>, WalkError> {
463-
globwalk_internal(base_path, include, exclude, walk_type, settings)
463+
retry_on_emfile(|| globwalk_internal(base_path, include, exclude, walk_type, settings))
464464
}
465465

466466
pub fn globwalk(
@@ -469,7 +469,58 @@ pub fn globwalk(
469469
exclude: &[ValidatedGlob],
470470
walk_type: WalkType,
471471
) -> Result<HashSet<AbsoluteSystemPathBuf>, WalkError> {
472-
globwalk_internal(base_path, include, exclude, walk_type, Default::default())
472+
retry_on_emfile(|| {
473+
globwalk_internal(base_path, include, exclude, walk_type, Default::default())
474+
})
475+
}
476+
477+
fn is_too_many_open_files(err: &WalkError) -> bool {
478+
// visit_file converts all walkdir/wax errors into WalkError::IO,
479+
// so this is the only variant we need to check.
480+
let WalkError::IO(e) = err else {
481+
return false;
482+
};
483+
484+
#[cfg(unix)]
485+
{
486+
e.raw_os_error() == Some(24)
487+
} // EMFILE
488+
#[cfg(windows)]
489+
{
490+
e.raw_os_error() == Some(4)
491+
} // ERROR_TOO_MANY_OPEN_FILES
492+
#[cfg(not(any(unix, windows)))]
493+
{
494+
false
495+
}
496+
}
497+
498+
fn retry_on_emfile<F>(mut f: F) -> Result<HashSet<AbsoluteSystemPathBuf>, WalkError>
499+
where
500+
F: FnMut() -> Result<HashSet<AbsoluteSystemPathBuf>, WalkError>,
501+
{
502+
const MAX_RETRIES: u32 = 10;
503+
const BASE_DELAY_MS: u64 = 10;
504+
const MAX_DELAY_MS: u64 = 1000;
505+
506+
for attempt in 0..MAX_RETRIES {
507+
match f() {
508+
Ok(result) => return Ok(result),
509+
Err(err) if is_too_many_open_files(&err) => {
510+
let delay = std::cmp::min(BASE_DELAY_MS * 2u64.pow(attempt), MAX_DELAY_MS);
511+
debug!(
512+
attempt = attempt + 1,
513+
delay_ms = delay,
514+
"too many open files, retrying globwalk"
515+
);
516+
std::thread::sleep(std::time::Duration::from_millis(delay));
517+
}
518+
Err(err) => return Err(err),
519+
}
520+
}
521+
522+
// Final attempt — propagate whatever happens.
523+
f()
473524
}
474525

475526
#[tracing::instrument(skip(include, exclude))]

crates/turborepo-lib/src/run/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use std::{
1919
use chrono::{DateTime, Local};
2020
use futures::StreamExt;
2121
use itertools::Itertools;
22-
use rayon::iter::ParallelBridge;
2322
use shared_child::SharedChild;
2423
use tokio::{pin, select, task::JoinHandle};
2524
use tracing::{debug, error, info, instrument, warn};
@@ -589,7 +588,7 @@ impl Run {
589588
let repo_index = self.repo_index.as_ref().as_ref();
590589
let package_inputs_hashes = PackageInputsHashes::calculate_file_hashes(
591590
&self.scm,
592-
self.engine.tasks().par_bridge(),
591+
self.engine.tasks(),
593592
workspaces,
594593
self.engine.task_definitions(),
595594
&self.repo_root,

crates/turborepo-lockfiles/examples/berry_resolutions.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,10 @@ fn main() {
66
let data = LockfileData::from_bytes(lockfile_bytes.as_slice()).unwrap();
77
let lockfile = BerryLockfile::new(data, Some(manifest)).unwrap();
88
let key = "debug@npm:3.2.7";
9+
let deps = lockfile.all_dependencies(key).unwrap().unwrap();
910
println!(
1011
"Dependencies of {key}: {}",
11-
lockfile
12-
.all_dependencies(key)
13-
.unwrap()
14-
.unwrap()
15-
.into_iter()
12+
deps.iter()
1613
.map(|(k, v)| format!("{k}@{v}"))
1714
.collect::<Vec<_>>()
1815
.join(", ")

crates/turborepo-lockfiles/src/berry/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,8 @@ impl Lockfile for BerryLockfile {
498498
fn all_dependencies(
499499
&self,
500500
key: &str,
501-
) -> Result<Option<std::collections::HashMap<String, String>>, crate::Error> {
501+
) -> Result<Option<std::borrow::Cow<'_, std::collections::HashMap<String, String>>>, crate::Error>
502+
{
502503
let locator = Locator::try_from(key).map_err(Error::from)?;
503504

504505
let Some(package) = self.locator_package.get(&locator) else {
@@ -518,8 +519,7 @@ impl Lockfile for BerryLockfile {
518519
}
519520
map.insert(dependency.ident.to_string(), dependency.range.to_string());
520521
}
521-
// For each dependency we need to check if there's an override
522-
Ok(Some(map))
522+
Ok(Some(std::borrow::Cow::Owned(map)))
523523
}
524524

525525
fn subgraph(

crates/turborepo-lockfiles/src/bun/mod.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,8 @@ impl Lockfile for BunLockfile {
588588
fn all_dependencies(
589589
&self,
590590
key: &str,
591-
) -> Result<Option<std::collections::HashMap<String, String>>, crate::Error> {
591+
) -> Result<Option<std::borrow::Cow<'_, std::collections::HashMap<String, String>>>, crate::Error>
592+
{
592593
let entry_key = self
593594
.key_to_entry
594595
.get(key)
@@ -602,18 +603,14 @@ impl Lockfile for BunLockfile {
602603
let mut deps = HashMap::new();
603604

604605
let Some(info) = &entry.info else {
605-
return Ok(Some(deps));
606+
return Ok(Some(std::borrow::Cow::Owned(deps)));
606607
};
607608

608609
for (dependency, version) in info.all_dependencies() {
609610
let is_optional = info.optional_dependencies.contains_key(dependency)
610611
|| info.optional_peers.contains(dependency);
611612

612613
if is_optional {
613-
// Optional peers without nested entries should be skipped (prevents pulling
614-
// unrelated packages like "next" into @vercel/analytics). But declared
615-
// optionalDependencies (platform-specific binaries) should include hoisted
616-
// versions when no nested version exists.
617614
let parent_key = format!("{entry_key}/{dependency}");
618615
let has_nested = self.data.packages.contains_key(&parent_key);
619616

@@ -631,7 +628,7 @@ impl Lockfile for BunLockfile {
631628
deps.insert(dependency.to_string(), version.to_string());
632629
}
633630

634-
Ok(Some(deps))
631+
Ok(Some(std::borrow::Cow::Owned(deps)))
635632
}
636633

637634
fn subgraph(

crates/turborepo-lockfiles/src/lib.rs

Lines changed: 74 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ mod yarn1;
2121

2222
use std::{
2323
any::Any,
24+
borrow::Cow,
2425
collections::{HashMap, HashSet},
2526
};
2627

@@ -35,7 +36,7 @@ use serde::Serialize;
3536
use turbopath::RelativeUnixPathBuf;
3637
pub use yarn1::{Yarn1Lockfile, yarn_subgraph};
3738

38-
type ResolveCache = DashMap<(String, String, String), Option<Package>>;
39+
type ResolveCache = DashMap<String, Option<Package>>;
3940

4041
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Hash, Serialize)]
4142
pub struct Package {
@@ -71,7 +72,10 @@ pub trait Lockfile: Send + Sync + Any + std::fmt::Debug {
7172

7273
/// Given a lockfile key return all (prod/dev/optional) direct dependencies
7374
/// of that package.
74-
fn all_dependencies(&self, key: &str) -> Result<Option<HashMap<String, String>>, Error>;
75+
fn all_dependencies(
76+
&self,
77+
key: &str,
78+
) -> Result<Option<Cow<'_, HashMap<String, String>>>, Error>;
7579

7680
/// Given a list of workspace packages and external packages that are
7781
/// dependencies of the workspace packages, produce a lockfile that only
@@ -169,71 +173,102 @@ fn transitive_closure_cached<L: Lockfile + ?Sized>(
169173
resolve_cache: &ResolveCache,
170174
) -> Result<HashSet<Package>, Error> {
171175
let mut transitive_deps = HashSet::new();
176+
let mut key_buf = String::new();
172177
transitive_closure_helper(
173178
lockfile,
174179
workspace_path,
175-
unresolved_deps,
180+
&unresolved_deps,
176181
&mut transitive_deps,
177182
ignore_missing_packages,
178183
resolve_cache,
184+
&mut key_buf,
179185
)?;
180186

181187
Ok(transitive_deps)
182188
}
183189

184-
fn transitive_closure_helper<L: Lockfile + ?Sized>(
190+
fn make_cache_key(buf: &mut String, workspace_path: &str, name: &str, specifier: &str) {
191+
buf.clear();
192+
buf.reserve(workspace_path.len() + name.len() + specifier.len() + 2);
193+
buf.push_str(workspace_path);
194+
buf.push('\0');
195+
buf.push_str(name);
196+
buf.push('\0');
197+
buf.push_str(specifier);
198+
}
199+
200+
fn resolve_deps<L: Lockfile + ?Sized>(
185201
lockfile: &L,
186202
workspace_path: &str,
187-
unresolved_deps: HashMap<String, impl AsRef<str>>,
188-
resolved_deps: &mut HashSet<Package>,
203+
unresolved_deps: &HashMap<String, String>,
189204
ignore_missing_packages: bool,
190205
resolve_cache: &ResolveCache,
191-
) -> Result<(), Error> {
206+
key_buf: &mut String,
207+
) -> Result<Vec<Package>, Error> {
208+
let mut newly_resolved = Vec::new();
209+
192210
for (name, specifier) in unresolved_deps {
193-
let specifier_ref = specifier.as_ref();
194-
let cache_key = (
195-
workspace_path.to_string(),
196-
name.clone(),
197-
specifier_ref.to_string(),
198-
);
199-
200-
let pkg = match resolve_cache.get(&cache_key) {
211+
make_cache_key(key_buf, workspace_path, name, specifier);
212+
213+
let pkg = match resolve_cache.get(key_buf.as_str()) {
201214
Some(cached) => cached.clone(),
202215
None => {
203-
let result = match lockfile.resolve_package(workspace_path, &name, specifier_ref) {
216+
let result = match lockfile.resolve_package(workspace_path, name, specifier) {
204217
Ok(pkg) => pkg,
205218
Err(Error::MissingWorkspace(_)) if ignore_missing_packages => {
206-
resolve_cache.insert(cache_key, None);
219+
resolve_cache.insert(key_buf.clone(), None);
207220
continue;
208221
}
209222
Err(e) => return Err(e),
210223
};
211-
resolve_cache.insert(cache_key, result.clone());
224+
resolve_cache.insert(key_buf.clone(), result.clone());
212225
result
213226
}
214227
};
215228

216-
match pkg {
217-
None => {
218-
continue;
219-
}
220-
Some(pkg) if resolved_deps.contains(&pkg) => {
221-
continue;
222-
}
223-
Some(pkg) => {
224-
let all_deps = lockfile.all_dependencies(&pkg.key)?;
225-
resolved_deps.insert(pkg);
226-
if let Some(deps) = all_deps {
227-
transitive_closure_helper(
228-
lockfile,
229-
workspace_path,
230-
deps,
231-
resolved_deps,
232-
false,
233-
resolve_cache,
234-
)?;
235-
}
236-
}
229+
if let Some(pkg) = pkg {
230+
newly_resolved.push(pkg);
231+
}
232+
}
233+
234+
Ok(newly_resolved)
235+
}
236+
237+
fn transitive_closure_helper<L: Lockfile + ?Sized>(
238+
lockfile: &L,
239+
workspace_path: &str,
240+
unresolved_deps: &HashMap<String, String>,
241+
resolved_deps: &mut HashSet<Package>,
242+
ignore_missing_packages: bool,
243+
resolve_cache: &ResolveCache,
244+
key_buf: &mut String,
245+
) -> Result<(), Error> {
246+
let newly_resolved = resolve_deps(
247+
lockfile,
248+
workspace_path,
249+
unresolved_deps,
250+
ignore_missing_packages,
251+
resolve_cache,
252+
key_buf,
253+
)?;
254+
255+
for pkg in newly_resolved {
256+
if resolved_deps.contains(&pkg) {
257+
continue;
258+
}
259+
260+
let all_deps = lockfile.all_dependencies(&pkg.key)?;
261+
resolved_deps.insert(pkg);
262+
if let Some(deps) = all_deps {
263+
transitive_closure_helper(
264+
lockfile,
265+
workspace_path,
266+
&deps,
267+
resolved_deps,
268+
false,
269+
resolve_cache,
270+
key_buf,
271+
)?;
237272
}
238273
}
239274

crates/turborepo-lockfiles/src/npm.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ impl Lockfile for NpmLockfile {
8181
}
8282

8383
#[tracing::instrument(skip(self))]
84-
fn all_dependencies(&self, key: &str) -> Result<Option<HashMap<String, String>>, Error> {
84+
fn all_dependencies(
85+
&self,
86+
key: &str,
87+
) -> Result<Option<std::borrow::Cow<'_, HashMap<String, String>>>, Error> {
8588
self.packages
8689
.get(key)
8790
.map(|pkg| {
@@ -98,7 +101,8 @@ impl Lockfile for NpmLockfile {
98101
}
99102
})
100103
})
101-
.collect()
104+
.collect::<Result<HashMap<_, _>, _>>()
105+
.map(std::borrow::Cow::Owned)
102106
})
103107
.transpose()
104108
}

0 commit comments

Comments
 (0)