Skip to content

Commit bc87fc4

Browse files
committed
perf: Parallelize pre-execution hotpath and optimize task hash tracker
Three architectural optimizations to the critical path before task execution: 1. **3-way parallel pre-execution phase**: Use rayon::scope to run package file hashing, internal deps hashing, and global file hash collection concurrently. Previously these three I/O-heavy operations ran sequentially. For large monorepos this can significantly reduce wall-clock startup time since each operation walks the filesystem independently. 2. **Mutex → RwLock on TaskHashTracker**: The tracker is read-heavy during task execution (dependency hash lookups, env var queries, cache status checks) but only written to when inserting new task results. RwLock allows concurrent reads, eliminating contention between parallel task visitors. 3. **Optimized dependency hash calculation**: Replace HashSet→Vec→sort with direct Vec→sort→dedup, avoiding the overhead of hashing hash strings through the HashSet. Pre-allocate HashMap capacity in get_package_hashes. The global hash computation is refactored to expose collect_global_file_hash_inputs() which contains the expensive I/O work (globwalk + file hashing + env var resolution) separate from the lightweight assembly step that needs external/internal dep hashes. This enables the caller to overlap I/O work across all three operations. https://claude.ai/code/session_01CfUraGs872goKHRKSRyFXi
1 parent 6ef1582 commit bc87fc4

File tree

5 files changed

+190
-93
lines changed

5 files changed

+190
-93
lines changed

crates/turborepo-lib/src/run/mod.rs

Lines changed: 85 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ use crate::{
4848
run::task_access::TaskAccess,
4949
task_graph::Visitor,
5050
task_hash::{
51-
get_external_deps_hash, get_global_hash_inputs, get_internal_deps_hash, PackageInputsHashes,
51+
GlobalHashableInputs, collect_global_file_hash_inputs, get_external_deps_hash,
52+
get_internal_deps_hash, global_hash::GLOBAL_CACHE_KEY, PackageInputsHashes,
5253
},
5354
turbo_json::{TurboJson, UnifiedTurboJsonLoader},
5455
};
@@ -584,15 +585,6 @@ impl Run {
584585
) -> Result<i32, Error> {
585586
let workspaces = self.pkg_dep_graph.packages().collect();
586587
let repo_index = self.repo_index.as_ref().as_ref();
587-
let package_inputs_hashes = PackageInputsHashes::calculate_file_hashes(
588-
&self.scm,
589-
self.engine.tasks(),
590-
workspaces,
591-
self.engine.task_definitions(),
592-
&self.repo_root,
593-
&self.run_telemetry,
594-
repo_index,
595-
)?;
596588

597589
let root_workspace = self
598590
.pkg_dep_graph
@@ -601,46 +593,94 @@ impl Run {
601593

602594
let is_monorepo = !self.opts.run_opts.single_package;
603595

604-
let root_external_dependencies_hash =
605-
is_monorepo.then(|| get_external_deps_hash(&root_workspace.transitive_dependencies));
596+
// Run three expensive I/O-bound operations concurrently using rayon::scope:
597+
// 1. Package file hashing - walks every package's files and computes hashes
598+
// 2. Internal deps hashing - walks root internal dependency packages
599+
// 3. Global file hash inputs - globwalks global deps and hashes them
600+
//
601+
// These are completely independent and dominate the pre-execution phase.
602+
// Running them in parallel can significantly reduce wall-clock time.
603+
let internal_dep_paths = is_monorepo.then(|| {
604+
self.pkg_dep_graph
605+
.root_internal_package_dependencies_paths()
606+
});
607+
608+
let env_mode = self.opts.run_opts.env_mode;
609+
610+
let mut file_hash_result = None;
611+
let mut internal_deps_result = None;
612+
let mut global_file_result = None;
606613

607-
let root_internal_dependencies_hash = is_monorepo
608-
.then(|| {
609-
get_internal_deps_hash(
614+
rayon::scope(|s| {
615+
s.spawn(|_| {
616+
file_hash_result = Some(PackageInputsHashes::calculate_file_hashes(
610617
&self.scm,
618+
self.engine.tasks(),
619+
workspaces,
620+
self.engine.task_definitions(),
611621
&self.repo_root,
612-
self.pkg_dep_graph
613-
.root_internal_package_dependencies_paths(),
622+
&self.run_telemetry,
614623
repo_index,
615-
)
616-
})
617-
.transpose()?;
618-
619-
let global_hash_inputs = {
620-
let env_mode = self.opts.run_opts.env_mode;
621-
let pass_through_env = match env_mode {
622-
EnvMode::Loose => {
623-
// Remove the passthroughs from hash consideration if we're explicitly loose.
624-
None
625-
}
626-
EnvMode::Strict => self.root_turbo_json.global_pass_through_env.as_deref(),
627-
};
624+
));
625+
});
626+
s.spawn(|_| {
627+
internal_deps_result = Some(
628+
internal_dep_paths
629+
.map(|dep_paths| {
630+
get_internal_deps_hash(
631+
&self.scm,
632+
&self.repo_root,
633+
dep_paths,
634+
repo_index,
635+
)
636+
})
637+
.transpose(),
638+
);
639+
});
640+
s.spawn(|_| {
641+
global_file_result = Some(collect_global_file_hash_inputs(
642+
root_workspace,
643+
&self.repo_root,
644+
self.pkg_dep_graph.package_manager(),
645+
self.pkg_dep_graph.lockfile(),
646+
&self.root_turbo_json.global_deps,
647+
&self.env_at_execution_start,
648+
&self.root_turbo_json.global_env,
649+
&self.scm,
650+
));
651+
});
652+
});
628653

629-
get_global_hash_inputs(
630-
root_external_dependencies_hash.as_deref(),
631-
root_internal_dependencies_hash.as_deref(),
632-
root_workspace,
633-
&self.repo_root,
634-
self.pkg_dep_graph.package_manager(),
635-
self.pkg_dep_graph.lockfile(),
636-
&self.root_turbo_json.global_deps,
637-
&self.env_at_execution_start,
638-
&self.root_turbo_json.global_env,
639-
pass_through_env,
640-
env_mode,
641-
self.opts.run_opts.framework_inference,
642-
&self.scm,
643-
)?
654+
let package_inputs_hashes =
655+
file_hash_result.expect("file hash task did not complete")?;
656+
let root_internal_dependencies_hash =
657+
internal_deps_result.expect("internal deps task did not complete")?;
658+
let global_file_inputs =
659+
global_file_result.expect("global file hash task did not complete")?;
660+
661+
let root_external_dependencies_hash =
662+
is_monorepo.then(|| get_external_deps_hash(&root_workspace.transitive_dependencies));
663+
664+
let pass_through_env = match env_mode {
665+
EnvMode::Loose => {
666+
// Remove the passthroughs from hash consideration if we're explicitly loose.
667+
None
668+
}
669+
EnvMode::Strict => self.root_turbo_json.global_pass_through_env.as_deref(),
670+
};
671+
672+
let global_hash_inputs = GlobalHashableInputs {
673+
global_cache_key: GLOBAL_CACHE_KEY,
674+
global_file_hash_map: global_file_inputs.global_file_hash_map,
675+
root_external_dependencies_hash: root_external_dependencies_hash.as_deref(),
676+
root_internal_dependencies_hash: root_internal_dependencies_hash.as_deref(),
677+
engines: global_file_inputs.engines,
678+
env: &self.root_turbo_json.global_env,
679+
resolved_env_vars: Some(global_file_inputs.global_hashable_env_vars),
680+
pass_through_env,
681+
env_mode,
682+
framework_inference: self.opts.run_opts.framework_inference,
683+
env_at_execution_start: &self.env_at_execution_start,
644684
};
645685
let global_hash = global_hash_inputs.calculate_global_hash();
646686

crates/turborepo-lib/src/task_hash.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
66
// Re-export all public types from turborepo-task-hash
77
pub use turborepo_task_hash::{
8-
get_external_deps_hash, get_global_hash_inputs, get_internal_deps_hash, global_hash, Error,
9-
GlobalHashableInputs, PackageInputsHashes, TaskHashTracker, TaskHashTrackerState,
8+
collect_global_file_hash_inputs, get_external_deps_hash, get_global_hash_inputs,
9+
get_internal_deps_hash, global_hash, Error, GlobalHashableInputs, PackageInputsHashes,
10+
TaskHashTracker, TaskHashTrackerState,
1011
};
1112

1213
use crate::opts::RunOpts;

crates/turborepo-scm/src/repo_index.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,28 @@ impl RepoGitIndex {
5959
let prefix_str = pkg_prefix.as_str();
6060
let prefix_is_empty = prefix_str.is_empty();
6161

62-
let mut hashes = GitHashes::new();
63-
if prefix_is_empty {
62+
let mut hashes = if prefix_is_empty {
63+
// Pre-allocate with exact capacity when copying the entire tree
64+
let mut h = GitHashes::with_capacity(self.ls_tree_hashes.len());
6465
for (path, hash) in &self.ls_tree_hashes {
65-
hashes.insert(path.clone(), hash.clone());
66+
h.insert(path.clone(), hash.clone());
6667
}
68+
h
6769
} else {
70+
// Use stack-allocated format strings to avoid heap allocations
71+
// for the range bounds. '/' is one char after '.' in ASCII,
72+
// and '0' is one char after '/' — so the range covers exactly
73+
// paths starting with "{prefix}/".
6874
let range_start = RelativeUnixPathBuf::new(format!("{}/", prefix_str)).unwrap();
6975
let range_end = RelativeUnixPathBuf::new(format!("{}0", prefix_str)).unwrap();
76+
let mut h = GitHashes::new();
7077
for (path, hash) in self.ls_tree_hashes.range(range_start..range_end) {
7178
if let Ok(stripped) = path.strip_prefix(pkg_prefix) {
72-
hashes.insert(stripped, hash.clone());
79+
h.insert(stripped, hash.clone());
7380
}
7481
}
75-
}
82+
h
83+
};
7684

7785
let mut to_hash = Vec::new();
7886
for entry in &self.status_entries {

crates/turborepo-task-hash/src/global_hash.rs

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use turborepo_types::EnvMode;
2525
#[allow(dead_code)]
2626
static DEFAULT_ENV_VARS: [&str; 1] = ["VERCEL_ANALYTICS_ID"];
2727

28-
const GLOBAL_CACHE_KEY: &str = "I can’t see ya, but I know you’re here";
28+
pub const GLOBAL_CACHE_KEY: &str = "I can’t see ya, but I know you’re here";
2929

3030
#[derive(Debug, Error)]
3131
pub enum Error {
@@ -74,6 +74,66 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>(
7474
framework_inference: bool,
7575
hasher: &SCM,
7676
) -> Result<GlobalHashableInputs<'a>, Error> {
77+
let GlobalFileHashInputs {
78+
global_file_hash_map,
79+
global_hashable_env_vars,
80+
engines,
81+
} = collect_global_file_hash_inputs(
82+
root_package,
83+
root_path,
84+
package_manager,
85+
lockfile,
86+
global_file_dependencies,
87+
env_at_execution_start,
88+
global_env,
89+
hasher,
90+
)?;
91+
92+
debug!(
93+
"external deps hash: {}",
94+
root_external_dependencies_hash.unwrap_or("no hash (single package)")
95+
);
96+
97+
Ok(GlobalHashableInputs {
98+
global_cache_key: GLOBAL_CACHE_KEY,
99+
global_file_hash_map,
100+
root_external_dependencies_hash,
101+
root_internal_dependencies_hash,
102+
engines,
103+
env: global_env,
104+
resolved_env_vars: Some(global_hashable_env_vars),
105+
pass_through_env: global_pass_through_env,
106+
env_mode,
107+
framework_inference,
108+
env_at_execution_start,
109+
})
110+
}
111+
112+
/// Intermediate result from `collect_global_file_hash_inputs`. Contains the
113+
/// expensive-to-compute parts of the global hash that are independent of the
114+
/// root external/internal dependency hashes. This allows callers to run this
115+
/// work concurrently with external/internal deps hash computation.
116+
pub struct GlobalFileHashInputs<'a> {
117+
pub global_file_hash_map: HashMap<RelativeUnixPathBuf, String>,
118+
pub global_hashable_env_vars: DetailedMap,
119+
pub engines: Option<HashMap<&'a str, &'a str>>,
120+
}
121+
122+
/// Collects global file hash inputs (globwalk, file hashing, env vars).
123+
/// This is the expensive I/O-bound portion of global hash computation and
124+
/// can be run concurrently with package file hashing and internal deps
125+
/// hashing since it has no dependencies on those results.
126+
#[allow(clippy::too_many_arguments, clippy::result_large_err)]
127+
pub fn collect_global_file_hash_inputs<'a, L: ?Sized + Lockfile>(
128+
root_package: &'a PackageInfo,
129+
root_path: &AbsoluteSystemPath,
130+
package_manager: &PackageManager,
131+
lockfile: Option<&L>,
132+
global_file_dependencies: &'a [String],
133+
env_at_execution_start: &'a EnvironmentVariableMap,
134+
global_env: &'a [String],
135+
hasher: &SCM,
136+
) -> Result<GlobalFileHashInputs<'a>, Error> {
77137
let engines = root_package.package_json.engines();
78138

79139
let global_hashable_env_vars =
@@ -102,23 +162,10 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>(
102162

103163
let global_file_hash_map = hasher.get_hashes_for_files(root_path, &global_deps_paths, false)?;
104164

105-
debug!(
106-
"external deps hash: {}",
107-
root_external_dependencies_hash.unwrap_or("no hash (single package)")
108-
);
109-
110-
Ok(GlobalHashableInputs {
111-
global_cache_key: GLOBAL_CACHE_KEY,
165+
Ok(GlobalFileHashInputs {
112166
global_file_hash_map,
113-
root_external_dependencies_hash,
114-
root_internal_dependencies_hash,
167+
global_hashable_env_vars,
115168
engines,
116-
env: global_env,
117-
resolved_env_vars: Some(global_hashable_env_vars),
118-
pass_through_env: global_pass_through_env,
119-
env_mode,
120-
framework_inference,
121-
env_at_execution_start,
122169
})
123170
}
124171

0 commit comments

Comments
 (0)