Skip to content

Commit e49477c

Browse files
committed
perf: Arc-share FileHashes and batch dependency hash lookups
- expanded_hashes now stores Arc<FileHashes> so task distribution is a refcount bump instead of cloning the entire HashMap per task. - calculate_dependency_hashes acquires the tracker mutex once for all dependencies instead of once per dependency. - Vendor::infer() hoisted before the scheduling loop.
1 parent 603f197 commit e49477c

File tree

2 files changed

+18
-14
lines changed
  • crates
    • turborepo-lib/src/task_graph/visitor
    • turborepo-task-hash/src

2 files changed

+18
-14
lines changed

crates/turborepo-lib/src/task_graph/visitor/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ impl<'a> Visitor<'a> {
199199
let span = Span::current();
200200

201201
let factory = ExecContextFactory::new(self, errors.clone(), self.manager.clone(), &engine)?;
202+
let cached_vendor_behavior = Vendor::infer().and_then(|vendor| vendor.behavior.as_ref());
202203

203204
while let Some(message) = node_stream.recv().await {
204205
let span = tracing::debug_span!(parent: &span, "queue_task", task = %message.info);
@@ -299,8 +300,7 @@ impl<'a> Visitor<'a> {
299300
continue;
300301
};
301302

302-
let vendor_behavior =
303-
Vendor::infer().and_then(|vendor| vendor.behavior.as_ref());
303+
let vendor_behavior = cached_vendor_behavior;
304304

305305
let output_client = if let Some(handle) = &self.ui_sender {
306306
TaskOutput::UI(handle.task(info.to_string()))

crates/turborepo-task-hash/src/lib.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ pub enum Error {
9595
#[derive(Debug, Default)]
9696
pub struct PackageInputsHashes {
9797
hashes: HashMap<TaskId<'static>, String>,
98-
expanded_hashes: HashMap<TaskId<'static>, FileHashes>,
98+
expanded_hashes: HashMap<TaskId<'static>, Arc<FileHashes>>,
9999
}
100100

101101
impl PackageInputsHashes {
@@ -267,9 +267,7 @@ impl PackageInputsHashes {
267267
let hash = file_hashes.as_ref().hash();
268268

269269
hashes.insert(info.task_id.clone(), hash);
270-
// Clone the Arc'd FileHashes for tasks sharing the same inputs.
271-
// This is a reference count bump, not a deep clone.
272-
expanded_hashes.insert(info.task_id, FileHashes(file_hashes.0.clone()));
270+
expanded_hashes.insert(info.task_id, Arc::clone(file_hashes));
273271
}
274272

275273
Ok(PackageInputsHashes {
@@ -296,7 +294,7 @@ pub struct TaskHashTrackerState {
296294
#[serde(skip)]
297295
package_task_cache: HashMap<TaskId<'static>, CacheHitMetadata>,
298296
#[serde(skip)]
299-
package_task_inputs_expanded_hashes: HashMap<TaskId<'static>, FileHashes>,
297+
package_task_inputs_expanded_hashes: HashMap<TaskId<'static>, Arc<FileHashes>>,
300298
}
301299

302300
/// Caches package-inputs hashes, and package-task hashes.
@@ -477,19 +475,25 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
477475
&self,
478476
dependency_set: HashSet<&TaskNode>,
479477
) -> Result<Vec<String>, Error> {
480-
let mut dependency_hash_set = HashSet::new();
478+
let state = self
479+
.task_hash_tracker
480+
.state
481+
.lock()
482+
.expect("hash tracker mutex poisoned");
481483

484+
let mut dependency_hash_set = HashSet::new();
482485
for dependency_task in dependency_set {
483486
let TaskNode::Task(dependency_task_id) = dependency_task else {
484487
continue;
485488
};
486489

487-
let dependency_hash = self
488-
.task_hash_tracker
489-
.hash(dependency_task_id)
490+
let dependency_hash = state
491+
.package_task_hashes
492+
.get(dependency_task_id)
490493
.ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?;
491494
dependency_hash_set.insert(dependency_hash.clone());
492495
}
496+
drop(state);
493497

494498
let mut dependency_hash_list = dependency_hash_set.into_iter().collect::<Vec<_>>();
495499
dependency_hash_list.sort_unstable();
@@ -656,7 +660,7 @@ pub fn get_internal_deps_hash(
656660
}
657661

658662
impl TaskHashTracker {
659-
pub fn new(input_expanded_hashes: HashMap<TaskId<'static>, FileHashes>) -> Self {
663+
pub fn new(input_expanded_hashes: HashMap<TaskId<'static>, Arc<FileHashes>>) -> Self {
660664
Self {
661665
state: Arc::new(Mutex::new(TaskHashTrackerState {
662666
package_task_inputs_expanded_hashes: input_expanded_hashes,
@@ -723,7 +727,7 @@ impl TaskHashTracker {
723727
state.package_task_cache.insert(task_id, cache_status);
724728
}
725729

726-
pub fn get_expanded_inputs(&self, task_id: &TaskId) -> Option<FileHashes> {
730+
pub fn get_expanded_inputs(&self, task_id: &TaskId) -> Option<Arc<FileHashes>> {
727731
let state = self.state.lock().expect("hash tracker mutex poisoned");
728732
state
729733
.package_task_inputs_expanded_hashes
@@ -773,7 +777,7 @@ impl HashTrackerInfo for TaskHashTracker {
773777
&self,
774778
task_id: &TaskId,
775779
) -> Option<std::collections::HashMap<RelativeUnixPathBuf, String>> {
776-
TaskHashTracker::get_expanded_inputs(self, task_id).map(|file_hashes| file_hashes.0)
780+
TaskHashTracker::get_expanded_inputs(self, task_id).map(|file_hashes| file_hashes.0.clone())
777781
}
778782
}
779783

0 commit comments

Comments
 (0)