Skip to content

Commit b0048b0

Browse files
authored
perf: Resolve literal input paths via stat instead of glob walk (#11955)
## Summary When tasks use `$TURBO_DEFAULT$` with additional literal inputs (like `$TURBO_ROOT$/tsconfig.json`), `get_package_file_hashes_from_inputs_and_index` was running every include pattern through `compile_globs` (wax regex compilation) + `walk_glob` (full directory traversal) — even for literal file paths that resolve to exactly one file. This separates literal paths from actual glob patterns. Literals get a single `symlink_metadata` syscall. Only patterns with metacharacters (`*`, `?`, `[`, `{`) go through the expensive compile + walk path. ## Impact On a 630-package monorepo where the `typecheck` task includes `$TURBO_ROOT$/tsconfig.json`, this eliminates ~990 out of 995 glob compilations and directory walks. **Profile data** (`turbo run typecheck --skip-infer --dry --profile`): | Function | Before | After | |----------|--------|-------| | `walk_glob` | 51.5% (721ms) | 5.2% (68ms) | | `compile_globs` | 28.5% (399ms) | <1% | | Total spans | 18,148 | 15,164 | **Hyperfine** (30 runs, `--warmup 10`, `typecheck --skip-infer --dry`): | Repo | Optimized | Mainline | Ratio | |------|-----------|----------|-------| | Large (630 pkgs) | 1.623s ± 0.159s | 1.654s ± 0.168s | 1.02x | | Medium (120 pkgs) | 821ms ± 66ms | 878ms ± 95ms | 1.07x | | Small (5 pkgs) | 584ms ± 44ms | 589ms ± 108ms | 1.01x | **Caveat**: Wall-clock improvement is within system noise on most runs. The CPU time reduction is real and consistent (User time on the large repo drops from ~1.96s to ~1.48s, a 25% reduction), but I/O and system-time variance prevent it from reliably surfacing in hyperfine for my machine.
1 parent ef22b25 commit b0048b0

File tree

2 files changed

+81
-31
lines changed

2 files changed

+81
-31
lines changed

crates/turborepo-globwalk/src/lib.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,24 @@ fn needs_path_cleaning(s: &str) -> bool {
440440
false
441441
}
442442

443+
/// Returns true if the pattern contains glob metacharacters (*, ?, [, {).
444+
/// Literal file paths return false.
445+
pub fn is_glob_pattern(pattern: &str) -> bool {
446+
// Check for unescaped glob metacharacters
447+
let mut chars = pattern.chars().peekable();
448+
while let Some(c) = chars.next() {
449+
if c == '\\' {
450+
// Skip escaped character
451+
chars.next();
452+
continue;
453+
}
454+
if matches!(c, '*' | '?' | '[' | '{') {
455+
return true;
456+
}
457+
}
458+
false
459+
}
460+
443461
pub fn globwalk_with_settings(
444462
base_path: &AbsoluteSystemPath,
445463
include: &[ValidatedGlob],

crates/turborepo-scm/src/package_deps.rs

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,22 @@ impl GitRepo {
267267
// Include globs can find files not in the git index (e.g. gitignored files
268268
// that a user explicitly wants to track). Walk the filesystem for these
269269
// files but skip re-hashing any already known from the index.
270+
//
271+
// Optimization: separate literal file paths from actual glob patterns.
272+
// Literal paths (e.g. "$TURBO_ROOT$/tsconfig.json") are resolved with a
273+
// single stat syscall instead of compiling a glob regex and walking a
274+
// directory tree.
270275
let pkg_prefix = package_path.to_unix();
271276

272277
if !includes.is_empty() {
273278
let full_pkg_path = turbo_root.resolve(package_path);
274279
let package_unix_path = pkg_prefix.as_str();
275280

276281
static CONFIG_FILES: &[&str] = &["package.json", "turbo.json", "turbo.jsonc"];
277-
let mut inclusions = Vec::with_capacity(includes.len() + CONFIG_FILES.len());
278-
let mut exclusions = Vec::new();
282+
283+
let mut glob_inclusions = Vec::new();
284+
let mut glob_exclusions = Vec::new();
285+
let mut literal_to_hash = Vec::new();
279286
let mut glob_buf = String::with_capacity(package_unix_path.len() + 1 + 64);
280287

281288
let all = includes.iter().copied().chain(CONFIG_FILES.iter().copied());
@@ -285,44 +292,69 @@ impl GitRepo {
285292
glob_buf.push_str(package_unix_path);
286293
glob_buf.push('/');
287294
glob_buf.push_str(exclusion.trim_start_matches('/'));
288-
exclusions.push(ValidatedGlob::from_str(&glob_buf)?);
295+
glob_exclusions.push(ValidatedGlob::from_str(&glob_buf)?);
296+
} else if !globwalk::is_glob_pattern(raw_glob) {
297+
// Literal file path — resolve directly via stat instead of
298+
// compiling a glob and walking directories.
299+
let resolved =
300+
full_pkg_path.join_unix_path(turbopath::RelativeUnixPath::new(raw_glob)?);
301+
if resolved.symlink_metadata().is_ok() {
302+
let git_relative = self.root.anchor(&resolved)?.to_unix();
303+
let pkg_relative =
304+
turbopath::RelativeUnixPath::strip_prefix(&git_relative, &pkg_prefix)
305+
.ok()
306+
.map(|s| s.to_owned());
307+
let already_known = pkg_relative
308+
.as_ref()
309+
.is_some_and(|rel| hashes.contains_key(rel));
310+
if !already_known {
311+
literal_to_hash.push(git_relative);
312+
}
313+
}
289314
} else {
290315
glob_buf.push_str(package_unix_path);
291316
glob_buf.push('/');
292317
glob_buf.push_str(raw_glob.trim_start_matches('/'));
293-
inclusions.push(ValidatedGlob::from_str(&glob_buf)?);
318+
glob_inclusions.push(ValidatedGlob::from_str(&glob_buf)?);
294319
}
295320
}
296321

297-
let files = globwalk::globwalk(
298-
turbo_root,
299-
&inclusions,
300-
&exclusions,
301-
globwalk::WalkType::Files,
302-
)?;
303-
304-
// Only hash files not already present from the git index
305-
let mut to_hash = Vec::new();
306-
for entry in &files {
307-
let git_relative = self.root.anchor(entry)?.to_unix();
308-
let pkg_relative =
309-
turbopath::RelativeUnixPath::strip_prefix(&git_relative, &pkg_prefix)
310-
.ok()
311-
.map(|s| s.to_owned());
312-
313-
let already_known = pkg_relative
314-
.as_ref()
315-
.is_some_and(|rel| hashes.contains_key(rel));
316-
317-
if !already_known {
318-
to_hash.push(git_relative);
319-
}
322+
// Hash any literal files discovered via direct stat.
323+
if !literal_to_hash.is_empty() {
324+
let mut new_hashes = GitHashes::with_capacity(literal_to_hash.len());
325+
hash_objects(&self.root, &full_pkg_path, literal_to_hash, &mut new_hashes)?;
326+
hashes.extend(new_hashes);
320327
}
321328

322-
if !to_hash.is_empty() {
323-
let mut new_hashes = GitHashes::with_capacity(to_hash.len());
324-
hash_objects(&self.root, &full_pkg_path, to_hash, &mut new_hashes)?;
325-
hashes.extend(new_hashes);
329+
// Only do the expensive glob walk for patterns that are actual globs.
330+
if !glob_inclusions.is_empty() {
331+
let files = globwalk::globwalk(
332+
turbo_root,
333+
&glob_inclusions,
334+
&glob_exclusions,
335+
globwalk::WalkType::Files,
336+
)?;
337+
338+
let mut to_hash = Vec::new();
339+
for entry in &files {
340+
let git_relative = self.root.anchor(entry)?.to_unix();
341+
let pkg_relative =
342+
turbopath::RelativeUnixPath::strip_prefix(&git_relative, &pkg_prefix)
343+
.ok()
344+
.map(|s| s.to_owned());
345+
let already_known = pkg_relative
346+
.as_ref()
347+
.is_some_and(|rel| hashes.contains_key(rel));
348+
if !already_known {
349+
to_hash.push(git_relative);
350+
}
351+
}
352+
353+
if !to_hash.is_empty() {
354+
let mut new_hashes = GitHashes::with_capacity(to_hash.len());
355+
hash_objects(&self.root, &full_pkg_path, to_hash, &mut new_hashes)?;
356+
hashes.extend(new_hashes);
357+
}
326358
}
327359
}
328360

0 commit comments

Comments
 (0)