Skip to content

Commit c06149c

Browse files
committed
perf: Use Arc<str> for task dependency hashes to avoid heap clones
Store task hashes as Arc<str> in TaskHashTrackerState instead of String. In calculate_dependency_hashes, cloning an Arc<str> is a ref count bump instead of a heap allocation. For the api monorepo (1687 tasks, ~3 deps each), this eliminates ~5000 String heap allocations per run.
1 parent 0739ca5 commit c06149c

File tree

2 files changed

+48
-18
lines changed

2 files changed

+48
-18
lines changed

crates/turborepo-hash/src/lib.rs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
77
mod traits;
88

9-
use std::collections::HashMap;
9+
use std::{collections::HashMap, sync::Arc};
1010

1111
use capnp::message::{Builder, HeapAllocator};
1212
pub use traits::TurboHash;
@@ -58,7 +58,7 @@ pub type EnvironmentVariablePairs = Vec<String>;
5858
pub struct TaskHashable<'a> {
5959
// hashes
6060
pub global_hash: &'a str,
61-
pub task_dependency_hashes: Vec<String>,
61+
pub task_dependency_hashes: Vec<Arc<str>>,
6262
pub hash_of_files: &'a str,
6363
pub external_deps_hash: Option<&'a str>,
6464

@@ -338,7 +338,7 @@ impl From<TaskHashable<'_>> for Builder<HeapAllocator> {
338338
.reborrow()
339339
.init_task_dependency_hashes(task_hashable.task_dependency_hashes.len() as u32);
340340
for (i, hash) in task_hashable.task_dependency_hashes.iter().enumerate() {
341-
task_dependency_hashes_builder.set(i as u32, hash);
341+
task_dependency_hashes_builder.set(i as u32, &**hash);
342342
}
343343
}
344344

@@ -497,6 +497,8 @@ impl From<GlobalHashable<'_>> for Builder<HeapAllocator> {
497497

498498
#[cfg(test)]
499499
mod test {
500+
use std::sync::Arc;
501+
500502
use test_case::test_case;
501503
use turborepo_lockfiles::Package;
502504
use turborepo_types::{EnvMode, TaskOutputs};
@@ -509,7 +511,7 @@ mod test {
509511
fn task_hashable() {
510512
let task_hashable = TaskHashable {
511513
global_hash: "global_hash",
512-
task_dependency_hashes: vec!["task_dependency_hash".to_string()],
514+
task_dependency_hashes: vec![Arc::from("task_dependency_hash")],
513515
package_dir: Some(turbopath::RelativeUnixPathBuf::new("package_dir").unwrap()),
514516
hash_of_files: "hash_of_files",
515517
external_deps_hash: Some("external_deps_hash"),
@@ -528,6 +530,36 @@ mod test {
528530
assert_eq!(task_hashable.hash(), "1f8b13161f57fca1");
529531
}
530532

533+
#[test]
534+
fn task_hashable_multiple_dependency_hashes() {
535+
let task_hashable = TaskHashable {
536+
global_hash: "global_hash",
537+
task_dependency_hashes: vec![
538+
Arc::from("dep_hash_a"),
539+
Arc::from("dep_hash_b"),
540+
Arc::from("dep_hash_c"),
541+
],
542+
package_dir: Some(turbopath::RelativeUnixPathBuf::new("package_dir").unwrap()),
543+
hash_of_files: "hash_of_files",
544+
external_deps_hash: Some("external_deps_hash"),
545+
task: "task",
546+
outputs: TaskOutputs {
547+
inclusions: vec!["inclusions".to_string()],
548+
exclusions: vec!["exclusions".to_string()],
549+
},
550+
pass_through_args: &["pass_thru_args".to_string()],
551+
env: &["env".to_string()],
552+
resolved_env_vars: vec![],
553+
pass_through_env: &["pass_thru_env".to_string()],
554+
env_mode: EnvMode::Strict,
555+
};
556+
557+
let hash = task_hashable.hash();
558+
assert!(!hash.is_empty());
559+
// Pin the hash so any serialization change is caught
560+
assert_eq!(hash, "7676d7bb7c86d257");
561+
}
562+
531563
#[test]
532564
fn global_hashable() {
533565
let global_file_hash_map = vec![(

crates/turborepo-task-hash/src/lib.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ pub struct TaskHashTracker {
230230
pub struct TaskHashTrackerState {
231231
#[serde(skip)]
232232
package_task_env_vars: HashMap<TaskId<'static>, DetailedMap>,
233-
package_task_hashes: HashMap<TaskId<'static>, String>,
233+
package_task_hashes: HashMap<TaskId<'static>, Arc<str>>,
234234
#[serde(skip)]
235235
package_task_framework: HashMap<TaskId<'static>, FrameworkSlug>,
236236
#[serde(skip)]
@@ -431,10 +431,11 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
431431

432432
let task_hash = task_hashable.calculate_task_hash();
433433

434+
let task_hash_arc: Arc<str> = Arc::from(task_hash.as_str());
434435
self.task_hash_tracker.insert_hash(
435436
task_id.clone(),
436437
env_vars,
437-
task_hash.clone(),
438+
task_hash_arc,
438439
framework_slug,
439440
);
440441

@@ -453,17 +454,14 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
453454
fn calculate_dependency_hashes(
454455
&self,
455456
dependency_set: HashSet<&TaskNode>,
456-
) -> Result<Vec<String>, Error> {
457+
) -> Result<Vec<Arc<str>>, Error> {
457458
let state = self
458459
.task_hash_tracker
459460
.state
460461
.read()
461462
.expect("hash tracker rwlock poisoned");
462463

463-
// Collect owned strings directly to avoid borrow lifetime issues with
464-
// the RwLock guard. We sort + dedup instead of using a HashSet to avoid
465-
// the overhead of hashing the hash strings.
466-
let mut dependency_hash_list: Vec<String> = Vec::with_capacity(dependency_set.len());
464+
let mut dependency_hash_list: Vec<Arc<str>> = Vec::with_capacity(dependency_set.len());
467465
for dependency_task in &dependency_set {
468466
let TaskNode::Task(dependency_task_id) = dependency_task else {
469467
continue;
@@ -473,7 +471,7 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
473471
.package_task_hashes
474472
.get(dependency_task_id)
475473
.ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?;
476-
dependency_hash_list.push(dependency_hash.clone());
474+
dependency_hash_list.push(Arc::clone(dependency_hash));
477475
}
478476
drop(state);
479477

@@ -591,7 +589,7 @@ impl TaskHashTracker {
591589
}
592590
}
593591

594-
pub fn hash(&self, task_id: &TaskId) -> Option<String> {
592+
pub fn hash(&self, task_id: &TaskId) -> Option<Arc<str>> {
595593
let state = self.state.read().expect("hash tracker rwlock poisoned");
596594
state.package_task_hashes.get(task_id).cloned()
597595
}
@@ -600,7 +598,7 @@ impl TaskHashTracker {
600598
&self,
601599
task_id: TaskId<'static>,
602600
env_vars: DetailedMap,
603-
hash: String,
601+
hash: Arc<str>,
604602
framework_slug: Option<FrameworkSlug>,
605603
) {
606604
let mut state = self.state.write().expect("hash tracker rwlock poisoned");
@@ -664,7 +662,7 @@ impl TaskHashTracker {
664662
// proper dependency direction (task-hash doesn't depend on run-summary).
665663
impl HashTrackerInfo for TaskHashTracker {
666664
fn hash(&self, task_id: &TaskId) -> Option<String> {
667-
TaskHashTracker::hash(self, task_id)
665+
TaskHashTracker::hash(self, task_id).map(|arc| arc.to_string())
668666
}
669667

670668
fn env_vars(&self, task_id: &TaskId) -> Option<HashTrackerDetailedMap> {
@@ -738,7 +736,7 @@ mod test {
738736
tracker.insert_hash(
739737
task_id.clone(),
740738
DetailedMap::default(),
741-
"abc123".to_string(),
739+
Arc::from("abc123"),
742740
None,
743741
);
744742

@@ -777,7 +775,7 @@ mod test {
777775
tracker.insert_hash(
778776
task_id.clone(),
779777
DetailedMap::default(),
780-
format!("hash-{i}"),
778+
Arc::from(format!("hash-{i}").as_str()),
781779
None,
782780
);
783781
}
@@ -990,7 +988,7 @@ mod test {
990988
tracker.insert_hash(
991989
task_id.clone(),
992990
DetailedMap::default(),
993-
format!("hash-{i}"),
991+
Arc::from(format!("hash-{i}").as_str()),
994992
None,
995993
);
996994
assert_eq!(

0 commit comments

Comments
 (0)