Skip to content

Commit 2b40c2e

Browse files
committed
perf: Replace git subprocesses with libgit2 library calls
Eliminate fork+exec overhead for the three hottest git subprocess calls in turbo run startup: WorktreeInfo::detect, git ls-tree, and git status. Replace with in-process libgit2 equivalents (Repository::discover, tree.walk, repo.statuses). Also skip expensive TaskSummary construction and SCMState::get when neither --dry nor --summarize is set, and use sorted Vec instead of BTreeMap for ls-tree results for better cache locality.
1 parent 8adae7e commit 2b40c2e

File tree

10 files changed

+550
-86
lines changed

10 files changed

+550
-86
lines changed

crates/turborepo-config/src/lib.rs

Lines changed: 77 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,8 @@ impl ConfigurationOptions {
360360
self.env_mode.unwrap_or_default()
361361
}
362362

363-
/// Returns the default cache directory path (relative to repo root).
364-
const DEFAULT_CACHE_DIR: &'static str = if cfg!(windows) {
363+
/// The default cache directory path (relative to repo root).
364+
pub const DEFAULT_CACHE_DIR: &'static str = if cfg!(windows) {
365365
".turbo\\cache"
366366
} else {
367367
".turbo/cache"
@@ -391,7 +391,6 @@ impl ConfigurationOptions {
391391
/// - `path`: The resolved cache directory path
392392
/// - `is_shared_worktree`: True if using shared cache from main worktree
393393
pub fn resolve_cache_dir(&self, repo_root: &AbsoluteSystemPath) -> CacheDirResult {
394-
// If explicit cacheDir is configured, always use it (no worktree sharing)
395394
if let Some(explicit_cache_dir) = &self.cache_dir {
396395
return CacheDirResult {
397396
path: explicit_cache_dir.clone(),
@@ -400,9 +399,28 @@ impl ConfigurationOptions {
400399
};
401400
}
402401

403-
// Try to detect worktree configuration
404-
match WorktreeInfo::detect(repo_root) {
405-
Ok(worktree_info) => {
402+
let worktree_info = WorktreeInfo::detect(repo_root).ok();
403+
self.resolve_cache_dir_with_worktree_info(worktree_info.as_ref())
404+
}
405+
406+
/// Resolve cache directory using pre-computed worktree info.
407+
///
408+
/// This variant avoids spawning a git subprocess, which allows the caller
409+
/// to run worktree detection on a background thread and pass the result in.
410+
pub fn resolve_cache_dir_with_worktree_info(
411+
&self,
412+
worktree_info: Option<&WorktreeInfo>,
413+
) -> CacheDirResult {
414+
if let Some(explicit_cache_dir) = &self.cache_dir {
415+
return CacheDirResult {
416+
path: explicit_cache_dir.clone(),
417+
is_shared_worktree: false,
418+
git_root: None,
419+
};
420+
}
421+
422+
match worktree_info {
423+
Some(worktree_info) => {
406424
debug!(
407425
"Worktree detection: current={}, main={}, is_linked={}",
408426
worktree_info.worktree_root,
@@ -411,8 +429,6 @@ impl ConfigurationOptions {
411429
);
412430
let git_root = Some(worktree_info.git_root.clone());
413431
if worktree_info.is_linked_worktree() {
414-
// We're in a linked worktree - use the main worktree's cache
415-
// Use turbopath's join_component to ensure consistent path separators
416432
let main_cache_path = worktree_info
417433
.main_worktree_root
418434
.join_component(".turbo")
@@ -425,7 +441,6 @@ impl ConfigurationOptions {
425441
debug!("Using shared worktree cache at: {}", result.path);
426442
result
427443
} else {
428-
// We're in the main worktree - use local cache
429444
debug!(
430445
"Using local cache (main worktree): {}",
431446
Self::DEFAULT_CACHE_DIR
@@ -437,12 +452,10 @@ impl ConfigurationOptions {
437452
}
438453
}
439454
}
440-
Err(e) => {
441-
// Detection failed - silently fall back to local cache
442-
// This is expected for non-git directories, so we don't warn
455+
None => {
443456
debug!(
444-
"Could not detect Git worktree configuration, using local cache: {}",
445-
e
457+
"No worktree info available, using local cache: {}",
458+
Self::DEFAULT_CACHE_DIR
446459
);
447460
CacheDirResult {
448461
path: Utf8PathBuf::from(Self::DEFAULT_CACHE_DIR),
@@ -878,6 +891,56 @@ mod test {
878891
);
879892
}
880893

894+
#[test]
895+
fn test_resolve_cache_dir_captures_git_root() {
896+
let tmp_dir = TempDir::new().unwrap();
897+
let repo_root = AbsoluteSystemPathBuf::try_from(tmp_dir.path())
898+
.unwrap()
899+
.to_realpath()
900+
.unwrap();
901+
902+
std::process::Command::new("git")
903+
.args(["init", "."])
904+
.current_dir(&repo_root)
905+
.output()
906+
.expect("git init failed");
907+
908+
let config = ConfigurationOptions::default();
909+
let result = config.resolve_cache_dir(&repo_root);
910+
911+
// git_root should be captured from worktree detection so SCM::new
912+
// can skip its own git rev-parse subprocess
913+
assert!(
914+
result.git_root.is_some(),
915+
"git_root should be captured when worktree detection succeeds"
916+
);
917+
assert_eq!(
918+
result.git_root.unwrap(),
919+
repo_root,
920+
"git_root should match repo root in a non-worktree repo"
921+
);
922+
}
923+
924+
#[test]
925+
fn test_resolve_cache_dir_explicit_skips_git_root() {
926+
let tmp_dir = TempDir::new().unwrap();
927+
let repo_root = AbsoluteSystemPath::from_std_path(tmp_dir.path()).unwrap();
928+
929+
let config = ConfigurationOptions {
930+
cache_dir: Some(camino::Utf8PathBuf::from("/my/cache")),
931+
..Default::default()
932+
};
933+
934+
let result = config.resolve_cache_dir(repo_root);
935+
936+
// When explicit cache_dir is set, no worktree detection runs,
937+
// so git_root is not available
938+
assert!(
939+
result.git_root.is_none(),
940+
"git_root should be None when explicit cache_dir bypasses detection"
941+
);
942+
}
943+
881944
/// Integration test that verifies linked worktree returns absolute path to
882945
/// main cache
883946
#[test]

crates/turborepo-run-summary/src/execution.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ pub struct TaskState {
187187
pub execution: Option<TaskExecutionSummary>,
188188
}
189189

190+
impl TaskSummaryInfo for TaskState {
191+
fn task_id(&self) -> &TaskId<'static> {
192+
&self.task_id
193+
}
194+
}
195+
190196
impl SummaryState {
191197
fn handle_event(&mut self, event: Event) {
192198
match event {
@@ -534,4 +540,87 @@ mod test {
534540
fn test_serialization(value: impl serde::Serialize, expected: serde_json::Value) {
535541
assert_eq!(serde_json::to_value(value).unwrap(), expected);
536542
}
543+
544+
// Verifies that failed tasks can be identified directly from TaskState,
545+
// without needing the full TaskSummary machinery. This is the data path
546+
// the optimized (non-summary) finish will use.
547+
#[tokio::test]
548+
async fn test_failed_tasks_identifiable_from_task_state() {
549+
let summary = ExecutionTracker::new();
550+
let success_task = TaskId::new("app", "build");
551+
let fail_task = TaskId::new("lib", "build");
552+
let cached_task = TaskId::new("utils", "build");
553+
554+
let mut handles = Vec::new();
555+
{
556+
let tracker = summary.task_tracker(success_task.clone());
557+
handles.push(tokio::spawn(async move {
558+
tracker.start().await.build_succeeded(0).await;
559+
}));
560+
}
561+
{
562+
let tracker = summary.task_tracker(fail_task.clone());
563+
handles.push(tokio::spawn(async move {
564+
tracker.start().await.build_failed(Some(1), "uh oh").await;
565+
}));
566+
}
567+
{
568+
let tracker = summary.task_tracker(cached_task.clone());
569+
handles.push(tokio::spawn(async move {
570+
tracker.start().await.cached().await;
571+
}));
572+
}
573+
for h in handles {
574+
h.await.unwrap();
575+
}
576+
577+
let state = summary.finish().await.unwrap();
578+
579+
// TaskState.execution carries enough info to identify failures
580+
let failed: Vec<&TaskState> = state
581+
.tasks
582+
.iter()
583+
.filter(|t| t.execution.as_ref().is_some_and(|e| e.is_failure()))
584+
.collect();
585+
assert_eq!(failed.len(), 1);
586+
assert_eq!(failed[0].task_id, fail_task);
587+
588+
// Counts are correct for ExecutionSummary construction
589+
assert_eq!(state.attempted, 3);
590+
assert_eq!(state.failed, 1);
591+
assert_eq!(state.success, 1);
592+
assert_eq!(state.cached, 1);
593+
}
594+
595+
// Verifies ExecutionSummary computes successful() correctly from SummaryState
596+
#[test]
597+
fn test_execution_summary_stats_from_state() {
598+
use turbopath::AnchoredSystemPath;
599+
600+
let state = SummaryState {
601+
attempted: 10,
602+
failed: 2,
603+
cached: 5,
604+
success: 3,
605+
tasks: vec![],
606+
};
607+
608+
let start = Local::now() - Duration::seconds(5);
609+
let end = Local::now();
610+
let summary = ExecutionSummary::new(
611+
"turbo run build".to_string(),
612+
state,
613+
Some(AnchoredSystemPath::empty()),
614+
1,
615+
start,
616+
end,
617+
);
618+
619+
// successful = success + cached
620+
assert_eq!(summary.successful(), 8);
621+
assert_eq!(summary.attempted, 10);
622+
assert_eq!(summary.failed, 2);
623+
assert_eq!(summary.cached, 5);
624+
assert_eq!(summary.exit_code, 1);
625+
}
537626
}

crates/turborepo-run-summary/src/task_factory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ where
159159

160160
Ok(SharedTaskSummary {
161161
hash,
162-
inputs: expanded_inputs.into_iter().collect(),
162+
inputs: expanded_inputs,
163163
hash_of_external_dependencies,
164164
cache: cache_summary,
165165
command,

crates/turborepo-run-summary/src/tracker.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,40 @@ impl RunTracker {
222222
{
223223
let end_time = Local::now();
224224

225+
// For the common case (no --dry, no --summarize), skip the expensive
226+
// TaskSummary construction, SCMState::get (2 git subprocesses), and
227+
// full RunSummary assembly. We only need execution stats and failed
228+
// task identification for terminal output.
229+
if run_opts.dry_run().is_none() && run_opts.summarize().is_none() {
230+
let summary_state = self.execution_tracker.finish().await?;
231+
232+
if !is_watch {
233+
// Extract failed tasks before moving summary_state into
234+
// ExecutionSummary. SummaryState derives Clone, but we only
235+
// need the task list for failure identification.
236+
let failed_tasks: Vec<TaskState> = summary_state
237+
.tasks
238+
.iter()
239+
.filter(|t| t.execution.as_ref().is_some_and(|e| e.is_failure()))
240+
.cloned()
241+
.collect();
242+
243+
let execution = ExecutionSummary::new(
244+
self.synthesized_command.clone(),
245+
summary_state,
246+
package_inference_root,
247+
exit_code,
248+
self.started_at,
249+
end_time,
250+
);
251+
252+
let path = repo_root.join_components(&[".turbo", "runs", "dummy.json"]);
253+
execution.print(ui, path, failed_tasks.iter().collect());
254+
}
255+
256+
return Ok(());
257+
}
258+
225259
let task_factory = TaskSummaryFactory::new(
226260
pkg_dep_graph,
227261
engine,

0 commit comments

Comments
 (0)