Skip to content

Commit 68397b6

Browse files
committed
perf: Optimize hotpath memory management and reduce heap allocations
- Stream file hashing in manual.rs: use fixed 8KiB stack buffer instead of reading entire files into heap-allocated Vecs, reducing per-file memory from O(file_size) to O(1) - Stack-allocated hex encoding: replace hex::encode() with inline hex encoding into stack buffers in both turborepo-hash (xxh64 → 16B) and turborepo-scm (SHA1 → 40B), eliminating intermediate Vec allocs - Pre-sized Oid formatting in hash_object.rs: use String::with_capacity(40) + write!() instead of to_string() to avoid reallocation for every file hashed in parallel via rayon - Pre-allocate parse buffers in ls_tree.rs and status.rs: initialize read buffers with 256B capacity instead of zero to avoid repeated growth during git output parsing - Optimize repo_index.rs: pre-size HashMap for root package, reuse a single String buffer for BTreeMap range boundary construction instead of two separate format!() allocations per package lookup - Remove hex crate dependency from turborepo-hash and turborepo-scm https://claude.ai/code/session_0117j5aMJ8CCBs1uimoSxDaC
1 parent 6ef1582 commit 68397b6

File tree

9 files changed

+91
-27
lines changed

9 files changed

+91
-27
lines changed

Cargo.lock

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/turborepo-hash/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ workspace = true
1010

1111
[dependencies]
1212
capnp = "0.24"
13-
hex = "0.4.3"
1413
turbopath = { workspace = true }
1514
turborepo-lockfiles = { workspace = true }
1615
turborepo-types = { workspace = true }

crates/turborepo-hash/src/traits.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,20 @@ where
1313
{
1414
}
1515

16+
/// Hex-encode a u64 into a fixed 16-byte stack buffer, returning a `&str`.
17+
/// Avoids the heap allocation that `hex::encode()` would perform.
18+
#[inline]
19+
fn hex_encode_u64(value: u64, buf: &mut [u8; 16]) -> &str {
20+
const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
21+
let bytes = value.to_be_bytes();
22+
for (i, &b) in bytes.iter().enumerate() {
23+
buf[i * 2] = HEX_CHARS[(b >> 4) as usize];
24+
buf[i * 2 + 1] = HEX_CHARS[(b & 0x0f) as usize];
25+
}
26+
// SAFETY: buf is filled with ASCII hex characters only.
27+
unsafe { std::str::from_utf8_unchecked(buf) }
28+
}
29+
1630
impl<T, A> TurboHash<A> for T
1731
where
1832
T: Into<Builder<A>>,
@@ -31,6 +45,9 @@ where
3145

3246
let out = xxhash_rust::xxh64::xxh64(buf, 0);
3347

34-
hex::encode(out.to_be_bytes())
48+
// Encode into a stack buffer and create the String from that, avoiding
49+
// the intermediate Vec allocation that hex::encode performs.
50+
let mut hex_buf = [0u8; 16];
51+
hex_encode_u64(out, &mut hex_buf).to_owned()
3552
}
3653
}

crates/turborepo-scm/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ workspace = true
1313
bstr = "1.4.0"
1414
git2 = { workspace = true, default-features = false, optional = true }
1515
globwalk = { path = "../turborepo-globwalk" }
16-
hex = { workspace = true }
1716
ignore = "0.4.20"
1817

1918
nom = "7.1.3"

crates/turborepo-scm/src/hash_object.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![cfg(feature = "git2")]
2+
use std::fmt::Write;
3+
24
use rayon::prelude::*;
35
use tracing::debug;
46
use turbopath::{AbsoluteSystemPath, AnchoredSystemPathBuf, RelativeUnixPath, RelativeUnixPathBuf};
@@ -63,7 +65,11 @@ pub(crate) fn hash_objects(
6365
AnchoredSystemPathBuf::relative_path_between(pkg_path, &full_file_path)
6466
.to_unix()
6567
});
66-
Ok(Some((package_relative_path, hash.to_string())))
68+
// Format the OID hex directly into a pre-sized String to
69+
// avoid the intermediate allocations of Display + to_string().
70+
let mut hex = String::with_capacity(40);
71+
write!(hex, "{hash}").expect("writing to String cannot fail");
72+
Ok(Some((package_relative_path, hex)))
6773
}
6874
Err(e) => {
6975
if e.class() == git2::ErrorClass::Os

crates/turborepo-scm/src/ls_tree.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ impl GitRepo {
7272

7373
fn read_ls_tree<R: Read>(reader: R, hashes: &mut GitHashes) -> Result<(), Error> {
7474
let mut reader = BufReader::with_capacity(64 * 1024, reader);
75-
let mut buffer = Vec::new();
75+
// Typical ls-tree entries are ~80-120 bytes; pre-allocate to avoid
76+
// repeated growth from zero.
77+
let mut buffer = Vec::with_capacity(256);
7678
while reader.read_until(b'\0', &mut buffer)? != 0 {
7779
let entry = parse_ls_tree(&buffer)?;
7880
let hash = std::str::from_utf8(entry.hash)
@@ -88,7 +90,7 @@ fn read_ls_tree<R: Read>(reader: R, hashes: &mut GitHashes) -> Result<(), Error>
8890

8991
fn read_ls_tree_sorted<R: Read>(reader: R, hashes: &mut SortedGitHashes) -> Result<(), Error> {
9092
let mut reader = BufReader::with_capacity(64 * 1024, reader);
91-
let mut buffer = Vec::new();
93+
let mut buffer = Vec::with_capacity(256);
9294
while reader.read_until(b'\0', &mut buffer)? != 0 {
9395
let entry = parse_ls_tree(&buffer)?;
9496
let hash = std::str::from_utf8(entry.hash)

crates/turborepo-scm/src/manual.rs

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,61 @@
44
use std::io::{ErrorKind, Read};
55

66
use globwalk::fix_glob_pattern;
7-
use hex::ToHex;
87
use ignore::WalkBuilder;
98
use sha1::{Digest, Sha1};
109
use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, IntoUnix};
1110
use wax::{Glob, Program, any};
1211

1312
use crate::{Error, GitHashes};
1413

14+
/// Hex-encode a 20-byte SHA1 digest into a stack-allocated buffer, returning
15+
/// a `&str`. Avoids the heap allocation that `encode_hex::<String>()` performs.
16+
#[inline]
17+
fn hex_encode_sha1(digest: &[u8; 20]) -> [u8; 40] {
18+
const HEX: &[u8; 16] = b"0123456789abcdef";
19+
let mut buf = [0u8; 40];
20+
for (i, &b) in digest.iter().enumerate() {
21+
buf[i * 2] = HEX[(b >> 4) as usize];
22+
buf[i * 2 + 1] = HEX[(b & 0x0f) as usize];
23+
}
24+
buf
25+
}
26+
1527
fn git_like_hash_file(path: &AbsoluteSystemPath) -> Result<String, Error> {
1628
let mut hasher = Sha1::new();
1729
let mut f = path.open()?;
18-
// Pre-allocate the buffer based on file metadata to avoid repeated
19-
// reallocations during read_to_end. The +1 accounts for read_to_end's
20-
// probe read that confirms EOF.
21-
let estimated_size = f.metadata().map(|m| m.len() as usize + 1).unwrap_or(0);
22-
let mut buffer = Vec::with_capacity(estimated_size);
23-
// Note that read_to_end reads the target if f is a symlink. Currently, this can
30+
let metadata = f.metadata()?;
31+
let file_size = metadata.len();
32+
33+
// Write the git blob header: "blob <size>\0"
34+
// This must use the exact byte count that will follow.
35+
hasher.update(b"blob ");
36+
hasher.update(file_size.to_string().as_bytes());
37+
hasher.update([b'\0']);
38+
39+
// Stream file contents through the hasher using a fixed stack buffer
40+
// instead of reading the entire file into a heap-allocated Vec. This
41+
// reduces per-file memory from O(file_size) to O(1) (8 KiB).
42+
//
43+
// Note that read reads the target if f is a symlink. Currently, this can
2444
// happen when we are hashing a specific set of files, which in turn only
2545
// happens for handling dotEnv files. It is likely that in the future we
2646
// will want to ensure that the target is better accounted for in the set of
2747
// inputs to the task. Manual hashing, as well as global deps and other
2848
// places that support globs all ignore symlinks.
29-
let size = f.read_to_end(&mut buffer)?;
30-
hasher.update("blob ".as_bytes());
31-
hasher.update(size.to_string().as_bytes());
32-
hasher.update([b'\0']);
33-
hasher.update(buffer.as_slice());
34-
let result = hasher.finalize();
35-
Ok(result.encode_hex::<String>())
49+
let mut buf = [0u8; 8192];
50+
loop {
51+
let n = f.read(&mut buf)?;
52+
if n == 0 {
53+
break;
54+
}
55+
hasher.update(&buf[..n]);
56+
}
57+
58+
let digest: [u8; 20] = hasher.finalize().into();
59+
let hex_buf = hex_encode_sha1(&digest);
60+
// SAFETY: hex_buf contains only ASCII hex characters.
61+
Ok(unsafe { String::from_utf8_unchecked(hex_buf.to_vec()) })
3662
}
3763

3864
fn to_glob(input: &str) -> Result<Glob<'_>, Error> {

crates/turborepo-scm/src/repo_index.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,29 @@ impl RepoGitIndex {
5959
let prefix_str = pkg_prefix.as_str();
6060
let prefix_is_empty = prefix_str.is_empty();
6161

62-
let mut hashes = GitHashes::new();
62+
let mut hashes = if prefix_is_empty {
63+
GitHashes::with_capacity(self.ls_tree_hashes.len())
64+
} else {
65+
GitHashes::new()
66+
};
6367
if prefix_is_empty {
6468
for (path, hash) in &self.ls_tree_hashes {
6569
hashes.insert(path.clone(), hash.clone());
6670
}
6771
} else {
68-
let range_start = RelativeUnixPathBuf::new(format!("{}/", prefix_str)).unwrap();
69-
let range_end = RelativeUnixPathBuf::new(format!("{}0", prefix_str)).unwrap();
72+
// Build range boundary strings in a reusable buffer to avoid two
73+
// separate format!() heap allocations per call. The '/' and '0'
74+
// characters are adjacent in ASCII, so `prefix/`..`prefix0`
75+
// captures exactly the entries under this package prefix.
76+
let mut key_buf = String::with_capacity(prefix_str.len() + 1);
77+
key_buf.push_str(prefix_str);
78+
key_buf.push('/');
79+
let range_start = RelativeUnixPathBuf::new(&key_buf).unwrap();
80+
// Replace trailing '/' with '0' (the next ASCII char after '/')
81+
// to form the exclusive upper bound.
82+
key_buf.pop();
83+
key_buf.push('0');
84+
let range_end = RelativeUnixPathBuf::new(&key_buf).unwrap();
7085
for (path, hash) in self.ls_tree_hashes.range(range_start..range_end) {
7186
if let Ok(stripped) = path.strip_prefix(pkg_prefix) {
7287
hashes.insert(stripped, hash.clone());

crates/turborepo-scm/src/status.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ fn read_status<R: Read>(
8484
) -> Result<Vec<RelativeUnixPathBuf>, Error> {
8585
let mut to_hash = Vec::new();
8686
let mut reader = BufReader::with_capacity(64 * 1024, reader);
87-
let mut buffer = Vec::new();
87+
// Status entries are typically ~6 bytes overhead + filename; pre-allocate
88+
// to avoid repeated growth from zero.
89+
let mut buffer = Vec::with_capacity(256);
8890
while reader.read_until(b'\0', &mut buffer)? != 0 {
8991
let entry = parse_status(&buffer)?;
9092
let filename = std::str::from_utf8(entry.filename)
@@ -109,7 +111,7 @@ fn read_status<R: Read>(
109111
fn read_status_raw<R: Read>(reader: R) -> Result<Vec<RepoStatusEntry>, Error> {
110112
let mut entries = Vec::new();
111113
let mut reader = BufReader::with_capacity(64 * 1024, reader);
112-
let mut buffer = Vec::new();
114+
let mut buffer = Vec::with_capacity(256);
113115
while reader.read_until(b'\0', &mut buffer)? != 0 {
114116
let entry = parse_status(&buffer)?;
115117
let filename = std::str::from_utf8(entry.filename)

0 commit comments

Comments
 (0)