Skip to content

Commit 9c1fedd

Browse files
committed
perf: Reduce allocations in SCM hashing, glob preprocessing, and cache lookups
Eliminate unnecessary heap allocations and reduce syscalls across the hot paths of turbo run: - ls_tree/status: Replace double allocation (to_vec + String::from_utf8) with single allocation (str::from_utf8 + to_owned). Increase BufReader buffer from 8KB to 64KB to reduce read syscalls. - hash_object: Pre-allocate HashMap capacity from known input size. - package_deps: Avoid cloning every input glob to String — iterate &str references and reuse a single buffer for path joining. Skip re-hashing files already known from the git index in the inputs+index path. - task-hash: Collect &str refs under mutex for dependency hash dedup instead of cloning each String. Add HashSet capacity hint. - globwalk: Add Vec capacity hints for include/exclude paths. Use Cow::into_owned instead of .to_string() to avoid allocation when collapse_path returns a borrowed reference. - cache: Reuse a single String buffer across the 3 path suffixes in FSCache::exists instead of 3 format!() + join_component() calls.
1 parent fb5ef0e commit 9c1fedd

File tree

7 files changed

+155
-102
lines changed

7 files changed

+155
-102
lines changed

crates/turborepo-cache/src/fs.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,29 @@ impl FSCache {
125125

126126
#[tracing::instrument(skip_all)]
127127
pub(crate) fn exists(&self, hash: &str) -> Result<Option<CacheHitMetadata>, CacheError> {
128-
let uncompressed_cache_path = self.cache_directory.join_component(&format!("{hash}.tar"));
129-
let compressed_cache_path = self
130-
.cache_directory
131-
.join_component(&format!("{hash}.tar.zst"));
128+
let cache_dir = self.cache_directory.as_str();
129+
let mut buf = String::with_capacity(cache_dir.len() + 1 + hash.len() + "-meta.json".len());
130+
buf.push_str(cache_dir);
131+
buf.push(std::path::MAIN_SEPARATOR);
132+
buf.push_str(hash);
133+
let prefix_len = buf.len();
134+
135+
buf.push_str(".tar");
136+
let uncompressed_exists = std::path::Path::new(&buf).exists();
132137

133-
if !uncompressed_cache_path.exists() && !compressed_cache_path.exists() {
138+
buf.push_str(".zst");
139+
let compressed_exists = std::path::Path::new(&buf).exists();
140+
141+
if !uncompressed_exists && !compressed_exists {
134142
return Ok(None);
135143
}
136144

145+
buf.truncate(prefix_len);
146+
buf.push_str("-meta.json");
147+
137148
let duration = CacheMetadata::read(
138-
&self
139-
.cache_directory
140-
.join_component(&format!("{hash}-meta.json")),
149+
&AbsoluteSystemPathBuf::try_from(buf.as_str())
150+
.map_err(|_| CacheError::ConfigCacheInvalidBase)?,
141151
)
142152
.map(|meta| meta.duration)
143153
.unwrap_or(0);

crates/turborepo-globwalk/src/lib.rs

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -80,25 +80,22 @@ fn preprocess_paths_and_globs<S: AsRef<str>>(
8080
.ok_or(WalkError::InvalidPath)?;
8181
let base_path_slash = escape_glob_literals(&raw_slash);
8282

83-
let (include_paths, lowest_segment) = include
84-
.iter()
85-
.map(|s| fix_glob_pattern(s.as_ref()).into_owned())
86-
.map(|mut s| {
83+
let (include_paths, lowest_segment) = {
84+
let mut paths = Vec::with_capacity(include.len());
85+
let mut lowest = usize::MAX;
86+
for s in include {
87+
let mut fixed = fix_glob_pattern(s.as_ref()).into_owned();
8788
// We need to check inclusion globs before the join
8889
// as to_slash doesn't preserve Windows drive names.
89-
add_doublestar_to_dir(base_path, &mut s);
90-
s
91-
})
92-
.map(|s| join_unix_like_paths(&base_path_slash, &s))
93-
.filter_map(|s| collapse_path(&s).map(|(s, v)| (s.to_string(), v)))
94-
.fold(
95-
(vec![], usize::MAX),
96-
|(mut vec, lowest_segment), (path, lowest_segment_next)| {
97-
let lowest_segment = std::cmp::min(lowest_segment, lowest_segment_next);
98-
vec.push(path); // we stringify here due to lifetime issues
99-
(vec, lowest_segment)
100-
},
101-
);
90+
add_doublestar_to_dir(base_path, &mut fixed);
91+
let joined = join_unix_like_paths(&base_path_slash, &fixed);
92+
if let Some((collapsed, segment)) = collapse_path(&joined) {
93+
lowest = std::cmp::min(lowest, segment);
94+
paths.push(collapsed.into_owned());
95+
}
96+
}
97+
(paths, lowest)
98+
};
10299

103100
let base_path = base_path
104101
.components()
@@ -108,16 +105,13 @@ fn preprocess_paths_and_globs<S: AsRef<str>>(
108105
)
109106
.collect::<PathBuf>();
110107

111-
let mut exclude_paths = vec![];
112-
for split in exclude
113-
.iter()
114-
.map(|s| fix_glob_pattern(s.as_ref()))
115-
.map(|s| join_unix_like_paths(&base_path_slash, s.as_ref()))
116-
.filter_map(|g| collapse_path(&g).map(|(s, _)| s.to_string()))
117-
{
118-
// if the glob ends with a slash, then we need to add a double star,
119-
// unless it already ends with a double star
120-
add_trailing_double_star(&mut exclude_paths, &split);
108+
let mut exclude_paths = Vec::with_capacity(exclude.len() * 2);
109+
for s in exclude {
110+
let fixed = fix_glob_pattern(s.as_ref());
111+
let joined = join_unix_like_paths(&base_path_slash, fixed.as_ref());
112+
if let Some((collapsed, _)) = collapse_path(&joined) {
113+
add_trailing_double_star(&mut exclude_paths, &collapsed);
114+
}
121115
}
122116

123117
Ok((base_path, include_paths, exclude_paths))

crates/turborepo-scm/src/hash_object.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pub(crate) fn hash_objects(
4545
) -> Result<(), Error> {
4646
let pkg_prefix = git_root.anchor(pkg_path).ok().map(|a| a.to_unix());
4747

48+
hashes.reserve(to_hash.len());
4849
let results: Vec<Result<Option<(RelativeUnixPathBuf, String)>, Error>> = to_hash
4950
.into_par_iter()
5051
.map(|filename| {

crates/turborepo-scm/src/ls_tree.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,26 +71,32 @@ impl GitRepo {
7171
}
7272

7373
fn read_ls_tree<R: Read>(reader: R, hashes: &mut GitHashes) -> Result<(), Error> {
74-
let mut reader = BufReader::new(reader);
74+
let mut reader = BufReader::with_capacity(64 * 1024, reader);
7575
let mut buffer = Vec::new();
7676
while reader.read_until(b'\0', &mut buffer)? != 0 {
7777
let entry = parse_ls_tree(&buffer)?;
78-
let hash = String::from_utf8(entry.hash.to_vec())?;
79-
let path = RelativeUnixPathBuf::new(String::from_utf8(entry.filename.to_vec())?)?;
80-
hashes.insert(path, hash);
78+
let hash = std::str::from_utf8(entry.hash)
79+
.map_err(|e| Error::git_error(format!("invalid utf8 in ls-tree hash: {e}")))?;
80+
let filename = std::str::from_utf8(entry.filename)
81+
.map_err(|e| Error::git_error(format!("invalid utf8 in ls-tree filename: {e}")))?;
82+
let path = RelativeUnixPathBuf::new(filename)?;
83+
hashes.insert(path, hash.to_owned());
8184
buffer.clear();
8285
}
8386
Ok(())
8487
}
8588

8689
fn read_ls_tree_sorted<R: Read>(reader: R, hashes: &mut SortedGitHashes) -> Result<(), Error> {
87-
let mut reader = BufReader::new(reader);
90+
let mut reader = BufReader::with_capacity(64 * 1024, reader);
8891
let mut buffer = Vec::new();
8992
while reader.read_until(b'\0', &mut buffer)? != 0 {
9093
let entry = parse_ls_tree(&buffer)?;
91-
let hash = String::from_utf8(entry.hash.to_vec())?;
92-
let path = RelativeUnixPathBuf::new(String::from_utf8(entry.filename.to_vec())?)?;
93-
hashes.insert(path, hash);
94+
let hash = std::str::from_utf8(entry.hash)
95+
.map_err(|e| Error::git_error(format!("invalid utf8 in ls-tree hash: {e}")))?;
96+
let filename = std::str::from_utf8(entry.filename)
97+
.map_err(|e| Error::git_error(format!("invalid utf8 in ls-tree filename: {e}")))?;
98+
let path = RelativeUnixPathBuf::new(filename)?;
99+
hashes.insert(path, hash.to_owned());
94100
buffer.clear();
95101
}
96102
Ok(())

crates/turborepo-scm/src/package_deps.rs

Lines changed: 86 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -200,45 +200,33 @@ impl GitRepo {
200200
let package_unix_path_buf = package_path.to_unix();
201201
let package_unix_path = package_unix_path_buf.as_str();
202202

203-
let mut inputs = inputs
203+
static CONFIG_FILES: &[&str] = &["package.json", "turbo.json", "turbo.jsonc"];
204+
let extra_inputs = if include_configs { CONFIG_FILES } else { &[] };
205+
let total_inputs = inputs.len() + extra_inputs.len();
206+
207+
// Build glob lists directly from &str references — no need to clone
208+
// every input into an owned String. We only allocate for the joined
209+
// "package_path/glob" strings that globwalk requires.
210+
let mut inclusions = Vec::with_capacity(total_inputs);
211+
let mut exclusions = Vec::with_capacity(total_inputs);
212+
let mut glob_buf = String::with_capacity(package_unix_path.len() + 1 + 64);
213+
214+
let all_inputs = inputs
204215
.iter()
205-
.map(|s| s.as_ref().to_string())
206-
.collect::<Vec<String>>();
207-
208-
if include_configs {
209-
// Add in package.json and turbo.json to input patterns. Both file paths are
210-
// relative to pkgPath
211-
//
212-
// - package.json is an input because if the `scripts` in the package.json
213-
// change (i.e. the tasks that turbo executes), we want a cache miss, since
214-
// any existing cache could be invalid.
215-
// - turbo.json because it's the definition of the tasks themselves. The root
216-
// turbo.json is similarly included in the global hash. This file may not
217-
// exist in the workspace, but that is ok, because it will get ignored
218-
// downstream.
219-
inputs.push("package.json".to_string());
220-
inputs.push("turbo.json".to_string());
221-
inputs.push("turbo.jsonc".to_string());
222-
}
223-
224-
// The input patterns are relative to the package.
225-
// However, we need to change the globbing to be relative to the repo root.
226-
// Prepend the package path to each of the input patterns.
227-
//
228-
// FIXME: we don't yet error on absolute unix paths being passed in as inputs,
229-
// and instead tack them on as if they were relative paths. This should be an
230-
// error further upstream, but since we haven't pulled the switch yet,
231-
// we need to mimic the Go behavior here and trim leading `/`
232-
// characters.
233-
let mut inclusions = vec![];
234-
let mut exclusions = vec![];
235-
for raw_glob in inputs {
216+
.map(|s| s.as_ref())
217+
.chain(extra_inputs.iter().copied());
218+
for raw_glob in all_inputs {
219+
glob_buf.clear();
236220
if let Some(exclusion) = raw_glob.strip_prefix('!') {
237-
let glob_str = [package_unix_path, exclusion.trim_start_matches('/')].join("/");
238-
exclusions.push(ValidatedGlob::from_str(&glob_str)?);
221+
glob_buf.push_str(package_unix_path);
222+
glob_buf.push('/');
223+
glob_buf.push_str(exclusion.trim_start_matches('/'));
224+
exclusions.push(ValidatedGlob::from_str(&glob_buf)?);
239225
} else {
240-
let glob_str = [package_unix_path, raw_glob.trim_start_matches('/')].join("/");
241-
inclusions.push(ValidatedGlob::from_str(&glob_str)?);
226+
glob_buf.push_str(package_unix_path);
227+
glob_buf.push('/');
228+
glob_buf.push_str(raw_glob.trim_start_matches('/'));
229+
inclusions.push(ValidatedGlob::from_str(&glob_buf)?);
242230
}
243231
}
244232
let files = globwalk::globwalk(
@@ -247,14 +235,11 @@ impl GitRepo {
247235
&exclusions,
248236
globwalk::WalkType::Files,
249237
)?;
250-
let to_hash = files
251-
.iter()
252-
.map(|entry| {
253-
let path = self.root.anchor(entry)?.to_unix();
254-
Ok(path)
255-
})
256-
.collect::<Result<Vec<_>, Error>>()?;
257-
let mut hashes = GitHashes::new();
238+
let mut to_hash = Vec::with_capacity(files.len());
239+
for entry in &files {
240+
to_hash.push(self.root.anchor(entry)?.to_unix());
241+
}
242+
let mut hashes = GitHashes::with_capacity(files.len());
258243
hash_objects(&self.root, &full_pkg_path, to_hash, &mut hashes)?;
259244
Ok(hashes)
260245
}
@@ -283,16 +268,66 @@ impl GitRepo {
283268
}
284269

285270
// Include globs can find files not in the git index (e.g. gitignored files
286-
// that a user explicitly wants to track). We still need globwalk for these
287-
// but can skip re-hashing files already known from the index.
271+
// that a user explicitly wants to track). Walk the filesystem for these
272+
// files but skip re-hashing any already known from the index.
288273
if !includes.is_empty() {
289-
let include_hashes = self.get_package_file_hashes_from_inputs(
274+
let full_pkg_path = turbo_root.resolve(package_path);
275+
let package_unix_path_buf = package_path.to_unix();
276+
let package_unix_path = package_unix_path_buf.as_str();
277+
278+
static CONFIG_FILES: &[&str] = &["package.json", "turbo.json", "turbo.jsonc"];
279+
let mut inclusions = Vec::with_capacity(includes.len() + CONFIG_FILES.len());
280+
let mut exclusions = Vec::new();
281+
let mut glob_buf = String::with_capacity(package_unix_path.len() + 1 + 64);
282+
283+
let all = includes.iter().copied().chain(CONFIG_FILES.iter().copied());
284+
for raw_glob in all {
285+
glob_buf.clear();
286+
if let Some(exclusion) = raw_glob.strip_prefix('!') {
287+
glob_buf.push_str(package_unix_path);
288+
glob_buf.push('/');
289+
glob_buf.push_str(exclusion.trim_start_matches('/'));
290+
exclusions.push(ValidatedGlob::from_str(&glob_buf)?);
291+
} else {
292+
glob_buf.push_str(package_unix_path);
293+
glob_buf.push('/');
294+
glob_buf.push_str(raw_glob.trim_start_matches('/'));
295+
inclusions.push(ValidatedGlob::from_str(&glob_buf)?);
296+
}
297+
}
298+
299+
let files = globwalk::globwalk(
290300
turbo_root,
291-
package_path,
292-
&includes,
293-
true,
301+
&inclusions,
302+
&exclusions,
303+
globwalk::WalkType::Files,
294304
)?;
295-
hashes.extend(include_hashes);
305+
306+
// Only hash files not already present from the git index
307+
let mut to_hash = Vec::new();
308+
for entry in &files {
309+
let git_relative = self.root.anchor(entry)?.to_unix();
310+
let pkg_relative = turbopath::RelativeUnixPath::strip_prefix(
311+
&git_relative,
312+
&package_unix_path_buf,
313+
)
314+
.ok()
315+
.map(|s| s.to_owned());
316+
317+
let already_known = pkg_relative
318+
.as_ref()
319+
.is_some_and(|rel| hashes.contains_key(rel));
320+
321+
if !already_known {
322+
to_hash.push(git_relative);
323+
}
324+
}
325+
326+
if !to_hash.is_empty() {
327+
let mut new_hashes = GitHashes::with_capacity(to_hash.len());
328+
hash_objects(&self.root, &full_pkg_path, to_hash, &mut new_hashes)?;
329+
hashes.extend(new_hashes);
330+
}
296331
}
297332

298333
// Apply excludes via in-memory matching — no filesystem walk needed since

crates/turborepo-scm/src/status.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,13 @@ fn read_status<R: Read>(
8383
hashes: &mut GitHashes,
8484
) -> Result<Vec<RelativeUnixPathBuf>, Error> {
8585
let mut to_hash = Vec::new();
86-
let mut reader = BufReader::new(reader);
86+
let mut reader = BufReader::with_capacity(64 * 1024, reader);
8787
let mut buffer = Vec::new();
8888
while reader.read_until(b'\0', &mut buffer)? != 0 {
8989
let entry = parse_status(&buffer)?;
90-
let path = RelativeUnixPathBuf::new(String::from_utf8(entry.filename.to_owned())?)?;
90+
let filename = std::str::from_utf8(entry.filename)
91+
.map_err(|e| Error::git_error(format!("invalid utf8 in git status: {e}")))?;
92+
let path = RelativeUnixPathBuf::new(filename)?;
9193
if entry.is_delete {
9294
let path = path.strip_prefix(pkg_prefix).map_err(|_| {
9395
Error::git_error(format!(
@@ -106,11 +108,13 @@ fn read_status<R: Read>(
106108

107109
fn read_status_raw<R: Read>(reader: R) -> Result<Vec<RepoStatusEntry>, Error> {
108110
let mut entries = Vec::new();
109-
let mut reader = BufReader::new(reader);
111+
let mut reader = BufReader::with_capacity(64 * 1024, reader);
110112
let mut buffer = Vec::new();
111113
while reader.read_until(b'\0', &mut buffer)? != 0 {
112114
let entry = parse_status(&buffer)?;
113-
let path = RelativeUnixPathBuf::new(String::from_utf8(entry.filename.to_owned())?)?;
115+
let filename = std::str::from_utf8(entry.filename)
116+
.map_err(|e| Error::git_error(format!("invalid utf8 in git status: {e}")))?;
117+
let path = RelativeUnixPathBuf::new(filename)?;
114118
entries.push(RepoStatusEntry {
115119
path,
116120
is_delete: entry.is_delete,

crates/turborepo-task-hash/src/lib.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
419419
.lock()
420420
.expect("hash tracker mutex poisoned");
421421

422-
let mut dependency_hash_set = HashSet::new();
422+
let mut dependency_hash_set = HashSet::with_capacity(dependency_set.len());
423423
for dependency_task in dependency_set {
424424
let TaskNode::Task(dependency_task_id) = dependency_task else {
425425
continue;
@@ -429,11 +429,14 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
429429
.package_task_hashes
430430
.get(dependency_task_id)
431431
.ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?;
432-
dependency_hash_set.insert(dependency_hash.clone());
432+
dependency_hash_set.insert(dependency_hash.as_str());
433433
}
434-
drop(state);
435434

436-
let mut dependency_hash_list = dependency_hash_set.into_iter().collect::<Vec<_>>();
435+
let mut dependency_hash_list: Vec<String> = dependency_hash_set
436+
.into_iter()
437+
.map(|s| s.to_owned())
438+
.collect();
439+
drop(state);
437440
dependency_hash_list.sort_unstable();
438441

439442
Ok(dependency_hash_list)

0 commit comments

Comments
 (0)