Skip to content

Commit 53de07f

Browse files
committed
perf: Optimize hotpath memory management and reduce allocations
- Replace hex::encode(to_be_bytes()) with direct format!("{:016x}") in TurboHash::hash(), eliminating intermediate byte array and the hex crate dependency entirely - Pre-size capnp first segment for FileHashes, LockFilePackages, and LockFilePackagesRef based on entry count, avoiding repeated segment growth allocations during message construction - Use as_str() instead of to_string() for package_dir in TaskHashable serialization, avoiding a per-task-hash String allocation - Replace HashSet with sort_unstable+dedup in calculate_dependency_hashes, removing HashSet hashing overhead and reducing allocations when dependency fan-out is large - Pre-allocate HashMap capacity in RepoGitIndex::get_package_hashes using exact count (full-repo) or BTreeMap range size_hint (per-package) - Use String::new() instead of "".into() for zero-allocation empty string returns in get_external_deps_hash and get_internal_deps_hash https://claude.ai/code/session_01TXs2FAMUjxXr25hLQwTMzF
1 parent 6ef1582 commit 53de07f

File tree

6 files changed

+59
-33
lines changed

6 files changed

+59
-33
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/turborepo-hash/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ workspace = true
1010

1111
[dependencies]
1212
capnp = "0.24"
13-
hex = "0.4.3"
1413
turbopath = { workspace = true }
1514
turborepo-lockfiles = { workspace = true }
1615
turborepo-types = { workspace = true }

crates/turborepo-hash/src/lib.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,13 @@ impl From<HashableTaskOutputs> for Builder<HeapAllocator> {
153153

154154
impl From<LockFilePackages> for Builder<HeapAllocator> {
155155
fn from(LockFilePackages(packages): LockFilePackages) -> Self {
156+
// Pre-size: each package has key + version strings (~12 words each).
157+
let estimated_words = packages.len() * 12 + 8;
158+
let allocator = HeapAllocator::default().first_segment_words(estimated_words as u32);
156159
let mut message = ::capnp::message::TypedBuilder::<
157160
proto_capnp::lock_file_packages::Owned,
158161
HeapAllocator,
159-
>::new_default();
162+
>::new(Builder::new(allocator));
160163
let mut builder = message.init_root();
161164

162165
{
@@ -189,10 +192,12 @@ impl From<LockFilePackages> for Builder<HeapAllocator> {
189192

190193
impl<'a> From<LockFilePackagesRef<'a>> for Builder<HeapAllocator> {
191194
fn from(LockFilePackagesRef(packages): LockFilePackagesRef<'a>) -> Self {
195+
let estimated_words = packages.len() * 12 + 8;
196+
let allocator = HeapAllocator::default().first_segment_words(estimated_words as u32);
192197
let mut message = ::capnp::message::TypedBuilder::<
193198
proto_capnp::lock_file_packages::Owned,
194199
HeapAllocator,
195-
>::new_default();
200+
>::new(Builder::new(allocator));
196201
let mut builder = message.init_root();
197202

198203
{
@@ -225,10 +230,16 @@ impl<'a> From<LockFilePackagesRef<'a>> for Builder<HeapAllocator> {
225230

226231
impl From<FileHashes> for Builder<HeapAllocator> {
227232
fn from(FileHashes(file_hashes): FileHashes) -> Self {
233+
// Pre-size the first segment to avoid repeated growth allocations.
234+
// Each file-hash entry needs roughly 16 capnp words (struct pointer +
235+
// two text fields with their data), plus fixed overhead for the message
236+
// root and list header.
237+
let estimated_words = file_hashes.len() * 16 + 8;
238+
let allocator = HeapAllocator::default().first_segment_words(estimated_words as u32);
228239
let mut message = ::capnp::message::TypedBuilder::<
229240
proto_capnp::file_hashes::Owned,
230241
HeapAllocator,
231-
>::new_default();
242+
>::new(Builder::new(allocator));
232243
let mut builder = message.init_root();
233244

234245
{
@@ -268,10 +279,12 @@ impl From<FileHashes> for Builder<HeapAllocator> {
268279

269280
impl From<&FileHashes> for Builder<HeapAllocator> {
270281
fn from(FileHashes(file_hashes): &FileHashes) -> Self {
282+
let estimated_words = file_hashes.len() * 16 + 8;
283+
let allocator = HeapAllocator::default().first_segment_words(estimated_words as u32);
271284
let mut message = ::capnp::message::TypedBuilder::<
272285
proto_capnp::file_hashes::Owned,
273286
HeapAllocator,
274-
>::new_default();
287+
>::new(Builder::new(allocator));
275288
let mut builder = message.init_root();
276289

277290
{
@@ -313,8 +326,8 @@ impl From<TaskHashable<'_>> for Builder<HeapAllocator> {
313326
let mut builder = message.init_root();
314327

315328
builder.set_global_hash(task_hashable.global_hash);
316-
if let Some(package_dir) = task_hashable.package_dir {
317-
builder.set_package_dir(package_dir.to_string());
329+
if let Some(ref package_dir) = task_hashable.package_dir {
330+
builder.set_package_dir(package_dir.as_str());
318331
}
319332

320333
builder.set_hash_of_files(task_hashable.hash_of_files);

crates/turborepo-hash/src/traits.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::fmt::Write;
2+
13
use capnp::message::{Allocator, Builder};
24

35
pub trait Sealed<A> {}
@@ -20,17 +22,17 @@ where
2022
{
2123
fn hash(self) -> String {
2224
let message = self.into();
25+
let segments = message.get_segments_for_output();
2326

24-
debug_assert_eq!(
25-
message.get_segments_for_output().len(),
26-
1,
27-
"message is not canonical"
28-
);
29-
30-
let buf = message.get_segments_for_output()[0];
27+
debug_assert_eq!(segments.len(), 1, "message is not canonical");
3128

32-
let out = xxhash_rust::xxh64::xxh64(buf, 0);
29+
let out = xxhash_rust::xxh64::xxh64(segments[0], 0);
3330

34-
hex::encode(out.to_be_bytes())
31+
// Format u64 directly as 16-char zero-padded lowercase hex.
32+
// Avoids the intermediate to_be_bytes() + hex::encode() roundtrip
33+
// which creates a temporary byte array and an extra Vec allocation.
34+
let mut s = String::with_capacity(16);
35+
write!(s, "{out:016x}").unwrap();
36+
s
3537
}
3638
}

crates/turborepo-scm/src/repo_index.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,28 @@ impl RepoGitIndex {
5959
let prefix_str = pkg_prefix.as_str();
6060
let prefix_is_empty = prefix_str.is_empty();
6161

62-
let mut hashes = GitHashes::new();
63-
if prefix_is_empty {
62+
let mut hashes = if prefix_is_empty {
63+
// Full-repo case: pre-allocate to the exact ls-tree count.
64+
let mut h = GitHashes::with_capacity(self.ls_tree_hashes.len());
6465
for (path, hash) in &self.ls_tree_hashes {
65-
hashes.insert(path.clone(), hash.clone());
66+
h.insert(path.clone(), hash.clone());
6667
}
68+
h
6769
} else {
6870
let range_start = RelativeUnixPathBuf::new(format!("{}/", prefix_str)).unwrap();
6971
let range_end = RelativeUnixPathBuf::new(format!("{}0", prefix_str)).unwrap();
70-
for (path, hash) in self.ls_tree_hashes.range(range_start..range_end) {
72+
let range = self.ls_tree_hashes.range(range_start..range_end);
73+
// Use size_hint to pre-allocate; BTreeMap ranges provide a lower
74+
// bound based on the tree structure.
75+
let (lower, _) = range.size_hint();
76+
let mut h = GitHashes::with_capacity(lower);
77+
for (path, hash) in range {
7178
if let Ok(stripped) = path.strip_prefix(pkg_prefix) {
72-
hashes.insert(stripped, hash.clone());
79+
h.insert(stripped, hash.clone());
7380
}
7481
}
75-
}
82+
h
83+
};
7684

7785
let mut to_hash = Vec::new();
7886
for entry in &self.status_entries {

crates/turborepo-task-hash/src/lib.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,11 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
430430
.lock()
431431
.expect("hash tracker mutex poisoned");
432432

433-
let mut dependency_hash_set = HashSet::with_capacity(dependency_set.len());
433+
// Collect borrowed hash strings, then sort+dedup instead of using an
434+
// intermediate HashSet. This avoids the HashSet's per-element hashing
435+
// overhead and its allocation, which matters when the dependency fan-out
436+
// is large.
437+
let mut dependency_hashes: Vec<&str> = Vec::with_capacity(dependency_set.len());
434438
for dependency_task in dependency_set {
435439
let TaskNode::Task(dependency_task_id) = dependency_task else {
436440
continue;
@@ -440,17 +444,18 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
440444
.package_task_hashes
441445
.get(dependency_task_id)
442446
.ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?;
443-
dependency_hash_set.insert(dependency_hash.as_str());
447+
dependency_hashes.push(dependency_hash.as_str());
444448
}
445449

446-
let mut dependency_hash_list: Vec<String> = dependency_hash_set
447-
.into_iter()
448-
.map(|s| s.to_owned())
449-
.collect();
450+
dependency_hashes.sort_unstable();
451+
dependency_hashes.dedup();
452+
453+
// Convert to owned Strings only after deduplication to minimize
454+
// allocations.
455+
let result: Vec<String> = dependency_hashes.into_iter().map(str::to_owned).collect();
450456
drop(state);
451-
dependency_hash_list.sort_unstable();
452457

453-
Ok(dependency_hash_list)
458+
Ok(result)
454459
}
455460

456461
pub fn into_task_hash_tracker_state(self) -> TaskHashTrackerState {
@@ -497,7 +502,7 @@ pub fn get_external_deps_hash(
497502
transitive_dependencies: &Option<HashSet<turborepo_lockfiles::Package>>,
498503
) -> String {
499504
let Some(transitive_dependencies) = transitive_dependencies else {
500-
return "".into();
505+
return String::new();
501506
};
502507

503508
// Collect references instead of cloning each Package (which has two Strings).
@@ -519,7 +524,7 @@ pub fn get_internal_deps_hash(
519524
pre_built_index: Option<&RepoGitIndex>,
520525
) -> Result<String, Error> {
521526
if package_dirs.is_empty() {
522-
return Ok("".into());
527+
return Ok(String::new());
523528
}
524529

525530
let owned_index;

0 commit comments

Comments
 (0)