diff --git a/crates/turborepo-config/src/lib.rs b/crates/turborepo-config/src/lib.rs index 27fa2450f583d..1082ea6b547c6 100644 --- a/crates/turborepo-config/src/lib.rs +++ b/crates/turborepo-config/src/lib.rs @@ -360,8 +360,8 @@ impl ConfigurationOptions { self.env_mode.unwrap_or_default() } - /// Returns the default cache directory path (relative to repo root). - const DEFAULT_CACHE_DIR: &'static str = if cfg!(windows) { + /// The default cache directory path (relative to repo root). + pub const DEFAULT_CACHE_DIR: &'static str = if cfg!(windows) { ".turbo\\cache" } else { ".turbo/cache" @@ -391,7 +391,6 @@ impl ConfigurationOptions { /// - `path`: The resolved cache directory path /// - `is_shared_worktree`: True if using shared cache from main worktree pub fn resolve_cache_dir(&self, repo_root: &AbsoluteSystemPath) -> CacheDirResult { - // If explicit cacheDir is configured, always use it (no worktree sharing) if let Some(explicit_cache_dir) = &self.cache_dir { return CacheDirResult { path: explicit_cache_dir.clone(), @@ -400,9 +399,28 @@ impl ConfigurationOptions { }; } - // Try to detect worktree configuration - match WorktreeInfo::detect(repo_root) { - Ok(worktree_info) => { + let worktree_info = WorktreeInfo::detect(repo_root).ok(); + self.resolve_cache_dir_with_worktree_info(worktree_info.as_ref()) + } + + /// Resolve cache directory using pre-computed worktree info. + /// + /// This variant avoids spawning a git subprocess, which allows the caller + /// to run worktree detection on a background thread and pass the result in. + pub fn resolve_cache_dir_with_worktree_info( + &self, + worktree_info: Option<&WorktreeInfo>, + ) -> CacheDirResult { + if let Some(explicit_cache_dir) = &self.cache_dir { + return CacheDirResult { + path: explicit_cache_dir.clone(), + is_shared_worktree: false, + git_root: None, + }; + } + + match worktree_info { + Some(worktree_info) => { debug!( "Worktree detection: current={}, main={}, is_linked={}", worktree_info.worktree_root, @@ -411,8 +429,6 @@ impl ConfigurationOptions { ); let git_root = Some(worktree_info.git_root.clone()); if worktree_info.is_linked_worktree() { - // We're in a linked worktree - use the main worktree's cache - // Use turbopath's join_component to ensure consistent path separators let main_cache_path = worktree_info .main_worktree_root .join_component(".turbo") @@ -425,7 +441,6 @@ impl ConfigurationOptions { debug!("Using shared worktree cache at: {}", result.path); result } else { - // We're in the main worktree - use local cache debug!( "Using local cache (main worktree): {}", Self::DEFAULT_CACHE_DIR @@ -437,12 +452,10 @@ impl ConfigurationOptions { } } } - Err(e) => { - // Detection failed - silently fall back to local cache - // This is expected for non-git directories, so we don't warn + None => { debug!( - "Could not detect Git worktree configuration, using local cache: {}", - e + "No worktree info available, using local cache: {}", + Self::DEFAULT_CACHE_DIR ); CacheDirResult { path: Utf8PathBuf::from(Self::DEFAULT_CACHE_DIR), @@ -858,7 +871,10 @@ mod test { #[test] fn test_resolve_cache_dir_default_returns_relative_path() { let tmp_dir = TempDir::new().unwrap(); - let repo_root = AbsoluteSystemPathBuf::try_from(tmp_dir.path()).unwrap(); + let repo_root = AbsoluteSystemPathBuf::try_from(tmp_dir.path()) + .unwrap() + .to_realpath() + .unwrap(); // Initialize git repo std::process::Command::new("git") @@ -878,6 +894,56 @@ mod test { ); } + #[test] + fn test_resolve_cache_dir_captures_git_root() { + let tmp_dir = TempDir::new().unwrap(); + let repo_root = AbsoluteSystemPathBuf::try_from(tmp_dir.path()) + .unwrap() + .to_realpath() + .unwrap(); + + std::process::Command::new("git") + .args(["init", "."]) + .current_dir(&repo_root) + .output() + .expect("git init failed"); + + let config = ConfigurationOptions::default(); + let result = config.resolve_cache_dir(&repo_root); + + // git_root should be captured from worktree detection so SCM::new + // can skip its own git rev-parse subprocess + assert!( + result.git_root.is_some(), + "git_root should be captured when worktree detection succeeds" + ); + assert_eq!( + result.git_root.unwrap(), + repo_root, + "git_root should match repo root in a non-worktree repo" + ); + } + + #[test] + fn test_resolve_cache_dir_explicit_skips_git_root() { + let tmp_dir = TempDir::new().unwrap(); + let repo_root = AbsoluteSystemPath::from_std_path(tmp_dir.path()).unwrap(); + + let config = ConfigurationOptions { + cache_dir: Some(camino::Utf8PathBuf::from("/my/cache")), + ..Default::default() + }; + + let result = config.resolve_cache_dir(repo_root); + + // When explicit cache_dir is set, no worktree detection runs, + // so git_root is not available + assert!( + result.git_root.is_none(), + "git_root should be None when explicit cache_dir bypasses detection" + ); + } + /// Integration test that verifies linked worktree returns absolute path to /// main cache #[test] diff --git a/crates/turborepo-run-summary/src/execution.rs b/crates/turborepo-run-summary/src/execution.rs index 4f4873f09194b..4065bf12a80fc 100644 --- a/crates/turborepo-run-summary/src/execution.rs +++ b/crates/turborepo-run-summary/src/execution.rs @@ -187,6 +187,12 @@ pub struct TaskState { pub execution: Option, } +impl TaskSummaryInfo for TaskState { + fn task_id(&self) -> &TaskId<'static> { + &self.task_id + } +} + impl SummaryState { fn handle_event(&mut self, event: Event) { match event { @@ -534,4 +540,87 @@ mod test { fn test_serialization(value: impl serde::Serialize, expected: serde_json::Value) { assert_eq!(serde_json::to_value(value).unwrap(), expected); } + + // Verifies that failed tasks can be identified directly from TaskState, + // without needing the full TaskSummary machinery. This is the data path + // the optimized (non-summary) finish will use. + #[tokio::test] + async fn test_failed_tasks_identifiable_from_task_state() { + let summary = ExecutionTracker::new(); + let success_task = TaskId::new("app", "build"); + let fail_task = TaskId::new("lib", "build"); + let cached_task = TaskId::new("utils", "build"); + + let mut handles = Vec::new(); + { + let tracker = summary.task_tracker(success_task.clone()); + handles.push(tokio::spawn(async move { + tracker.start().await.build_succeeded(0).await; + })); + } + { + let tracker = summary.task_tracker(fail_task.clone()); + handles.push(tokio::spawn(async move { + tracker.start().await.build_failed(Some(1), "uh oh").await; + })); + } + { + let tracker = summary.task_tracker(cached_task.clone()); + handles.push(tokio::spawn(async move { + tracker.start().await.cached().await; + })); + } + for h in handles { + h.await.unwrap(); + } + + let state = summary.finish().await.unwrap(); + + // TaskState.execution carries enough info to identify failures + let failed: Vec<&TaskState> = state + .tasks + .iter() + .filter(|t| t.execution.as_ref().is_some_and(|e| e.is_failure())) + .collect(); + assert_eq!(failed.len(), 1); + assert_eq!(failed[0].task_id, fail_task); + + // Counts are correct for ExecutionSummary construction + assert_eq!(state.attempted, 3); + assert_eq!(state.failed, 1); + assert_eq!(state.success, 1); + assert_eq!(state.cached, 1); + } + + // Verifies ExecutionSummary computes successful() correctly from SummaryState + #[test] + fn test_execution_summary_stats_from_state() { + use turbopath::AnchoredSystemPath; + + let state = SummaryState { + attempted: 10, + failed: 2, + cached: 5, + success: 3, + tasks: vec![], + }; + + let start = Local::now() - Duration::seconds(5); + let end = Local::now(); + let summary = ExecutionSummary::new( + "turbo run build".to_string(), + state, + Some(AnchoredSystemPath::empty()), + 1, + start, + end, + ); + + // successful = success + cached + assert_eq!(summary.successful(), 8); + assert_eq!(summary.attempted, 10); + assert_eq!(summary.failed, 2); + assert_eq!(summary.cached, 5); + assert_eq!(summary.exit_code, 1); + } } diff --git a/crates/turborepo-run-summary/src/task_factory.rs b/crates/turborepo-run-summary/src/task_factory.rs index 2474e9010e689..966a6b2781850 100644 --- a/crates/turborepo-run-summary/src/task_factory.rs +++ b/crates/turborepo-run-summary/src/task_factory.rs @@ -159,7 +159,7 @@ where Ok(SharedTaskSummary { hash, - inputs: expanded_inputs.into_iter().collect(), + inputs: expanded_inputs, hash_of_external_dependencies, cache: cache_summary, command, diff --git a/crates/turborepo-run-summary/src/tracker.rs b/crates/turborepo-run-summary/src/tracker.rs index 19bcd25b26e98..dd20f3fdbad5c 100644 --- a/crates/turborepo-run-summary/src/tracker.rs +++ b/crates/turborepo-run-summary/src/tracker.rs @@ -222,6 +222,40 @@ impl RunTracker { { let end_time = Local::now(); + // For the common case (no --dry, no --summarize), skip the expensive + // TaskSummary construction, SCMState::get (2 git subprocesses), and + // full RunSummary assembly. We only need execution stats and failed + // task identification for terminal output. + if run_opts.dry_run().is_none() && run_opts.summarize().is_none() { + let summary_state = self.execution_tracker.finish().await?; + + if !is_watch { + // Extract failed tasks before moving summary_state into + // ExecutionSummary. SummaryState derives Clone, but we only + // need the task list for failure identification. + let failed_tasks: Vec = summary_state + .tasks + .iter() + .filter(|t| t.execution.as_ref().is_some_and(|e| e.is_failure())) + .cloned() + .collect(); + + let execution = ExecutionSummary::new( + self.synthesized_command.clone(), + summary_state, + package_inference_root, + exit_code, + self.started_at, + end_time, + ); + + let path = repo_root.join_components(&[".turbo", "runs", "dummy.json"]); + execution.print(ui, path, failed_tasks.iter().collect()); + } + + return Ok(()); + } + let task_factory = TaskSummaryFactory::new( pkg_dep_graph, engine, diff --git a/crates/turborepo-scm/src/ls_tree.rs b/crates/turborepo-scm/src/ls_tree.rs index 9bd58b5a82c5c..45ef1e7f87196 100644 --- a/crates/turborepo-scm/src/ls_tree.rs +++ b/crates/turborepo-scm/src/ls_tree.rs @@ -1,5 +1,4 @@ use std::{ - collections::BTreeMap, io::{BufRead, BufReader, Read}, process::{Command, Stdio}, }; @@ -9,7 +8,11 @@ use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; use crate::{Error, GitHashes, GitRepo, wait_for_success}; -pub(crate) type SortedGitHashes = BTreeMap; +/// Sorted list of (path, hash) pairs from `git ls-tree`. Uses a `Vec` instead +/// of `BTreeMap` because git output is already sorted by pathname, giving us +/// free insertion order with better cache locality for the `partition_point` +/// range lookups performed in `RepoGitIndex::get_package_hashes`. +pub(crate) type SortedGitHashes = Vec<(RelativeUnixPathBuf, String)>; impl GitRepo { #[tracing::instrument(skip(self))] @@ -37,28 +40,48 @@ impl GitRepo { } /// Run `git ls-tree` once at the git repo root, returning all committed - /// file hashes in a sorted `BTreeMap` for efficient prefix-range lookups. + /// file hashes in a sorted Vec for efficient prefix-range lookups. + /// + /// Uses libgit2 to walk the HEAD tree in-process, avoiding the overhead + /// of spawning a git subprocess. + #[cfg(feature = "git2")] #[tracing::instrument(skip(self))] pub fn git_ls_tree_repo_root_sorted(&self) -> Result { - let mut hashes = BTreeMap::new(); - let mut git = Command::new(self.bin.as_std_path()) - .args(["ls-tree", "-r", "-z", "HEAD"]) - .env("GIT_OPTIONAL_LOCKS", "0") - .current_dir(&self.root) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; + let repo = git2::Repository::open(self.root.as_std_path()) + .map_err(|e| Error::git2_error_context(e, "opening repo for ls-tree".into()))?; + let head = repo + .head() + .map_err(|e| Error::git2_error_context(e, "resolving HEAD".into()))?; + let tree = head + .peel_to_tree() + .map_err(|e| Error::git2_error_context(e, "peeling HEAD to tree".into()))?; + + let mut hashes = Vec::new(); + tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| { + // Only collect blob entries (files), skip trees (directories) + if entry.kind() == Some(git2::ObjectType::Blob) { + let name = match entry.name() { + Some(n) => n, + None => return git2::TreeWalkResult::Ok, + }; + let path_str = if dir.is_empty() { + name.to_string() + } else { + format!("{dir}{name}") + }; + if let Ok(path) = RelativeUnixPathBuf::new(path_str) { + hashes.push((path, entry.id().to_string())); + } + } + git2::TreeWalkResult::Ok + }) + .map_err(|e| Error::git2_error_context(e, "walking tree".into()))?; + + // git2 tree walk is in pre-order which is lexicographic within each + // directory level, but the flattened paths may not be globally sorted + // (e.g. "a/b" vs "a.txt"). Sort to maintain the binary-search invariant. + hashes.sort_by(|(a, _), (b, _)| a.cmp(b)); - let stdout = git - .stdout - .as_mut() - .ok_or_else(|| Error::git_error("failed to get stdout for git ls-tree"))?; - let mut stderr = git - .stderr - .take() - .ok_or_else(|| Error::git_error("failed to get stderr for git ls-tree"))?; - let parse_result = read_ls_tree_sorted(stdout, &mut hashes); - wait_for_success(git, &mut stderr, "git ls-tree", &self.root, parse_result)?; Ok(hashes) } @@ -86,6 +109,7 @@ fn read_ls_tree(reader: R, hashes: &mut GitHashes) -> Result<(), Error> Ok(()) } +#[cfg(test)] fn read_ls_tree_sorted(reader: R, hashes: &mut SortedGitHashes) -> Result<(), Error> { let mut reader = BufReader::with_capacity(64 * 1024, reader); let mut buffer = Vec::new(); @@ -96,9 +120,13 @@ fn read_ls_tree_sorted(reader: R, hashes: &mut SortedGitHashes) -> Resu let filename = std::str::from_utf8(entry.filename) .map_err(|e| Error::git_error(format!("invalid utf8 in ls-tree filename: {e}")))?; let path = RelativeUnixPathBuf::new(filename)?; - hashes.insert(path, hash.to_owned()); + hashes.push((path, hash.to_owned())); buffer.clear(); } + debug_assert!( + hashes.windows(2).all(|w| w[0].0 < w[1].0), + "git ls-tree output should be sorted by pathname" + ); Ok(()) } @@ -147,6 +175,54 @@ mod tests { ) } + fn to_sorted_hashes(pairs: &[(&str, &str)]) -> super::SortedGitHashes { + pairs + .iter() + .map(|(path, hash)| (RelativeUnixPathBuf::new(*path).unwrap(), hash.to_string())) + .collect() + } + + // Verifies that read_ls_tree_sorted produces correct sorted Vec entries + // from git ls-tree output. + #[test] + fn test_ls_tree_sorted() { + let input = "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json\x00100644 \ + blob 5b999efa470b056e329b4c23a73904e0794bdc2f\tsrc/index.ts\x00100644 blob \ + f44f57fff95196c5f7139dfa0b96875f1e9650a9\tsrc/utils.ts\0"; + + let expected = to_sorted_hashes(&[ + ("package.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("src/index.ts", "5b999efa470b056e329b4c23a73904e0794bdc2f"), + ("src/utils.ts", "f44f57fff95196c5f7139dfa0b96875f1e9650a9"), + ]); + + let mut hashes = super::SortedGitHashes::new(); + super::read_ls_tree_sorted(input.as_bytes(), &mut hashes).unwrap(); + assert_eq!(hashes, expected); + + // Verify entries are sorted (invariant needed for binary search) + assert!( + hashes.windows(2).all(|w| w[0].0 < w[1].0), + "sorted Vec should maintain sorted order" + ); + } + + // Verifies read_ls_tree_sorted handles all the edge cases that read_ls_tree + // handles. Both parsers share the same `parse_ls_tree` function. + #[test] + fn test_ls_tree_sorted_edge_cases() { + // Single entry without trailing NUL + let input = "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json"; + let mut hashes = super::SortedGitHashes::new(); + super::read_ls_tree_sorted(input.as_bytes(), &mut hashes).unwrap(); + assert_eq!(hashes.len(), 1); + + // Empty input + let mut hashes = super::SortedGitHashes::new(); + super::read_ls_tree_sorted("".as_bytes(), &mut hashes).unwrap(); + assert_eq!(hashes.len(), 0); + } + #[test] fn test_ls_tree() { let tests: &[(&str, &[(&str, &str)])] = &[ diff --git a/crates/turborepo-scm/src/repo_index.rs b/crates/turborepo-scm/src/repo_index.rs index e407663304845..fa993af9aee9f 100644 --- a/crates/turborepo-scm/src/repo_index.rs +++ b/crates/turborepo-scm/src/repo_index.rs @@ -9,8 +9,10 @@ use crate::{Error, GitHashes, GitRepo, ls_tree::SortedGitHashes, status::RepoSta /// and `git status` so they can be filtered per-package without spawning /// additional subprocesses. /// -/// Uses a `BTreeMap` for the ls-tree data so that per-package lookups can -/// use `range()` on the sorted keys instead of scanning every entry. +/// Uses a sorted `Vec` for the ls-tree data so that per-package lookups can +/// use `partition_point` (binary search) for range queries. This gives the +/// same O(log n) asymptotic cost as a `BTreeMap` but with better cache +/// locality on the contiguous memory. pub struct RepoGitIndex { ls_tree_hashes: SortedGitHashes, status_entries: Vec, @@ -60,21 +62,23 @@ impl RepoGitIndex { let prefix_is_empty = prefix_str.is_empty(); let mut hashes = if prefix_is_empty { - // Pre-allocate with exact capacity when copying the entire tree let mut h = GitHashes::with_capacity(self.ls_tree_hashes.len()); for (path, hash) in &self.ls_tree_hashes { h.insert(path.clone(), hash.clone()); } h } else { - // Use stack-allocated format strings to avoid heap allocations - // for the range bounds. '/' is one char after '.' in ASCII, - // and '0' is one char after '/' — so the range covers exactly - // paths starting with "{prefix}/". + // Binary search for the range of paths starting with "{prefix}/". + // '0' is one codepoint after '/' in ASCII, so the range covers + // exactly paths starting with the prefix followed by '/'. let range_start = RelativeUnixPathBuf::new(format!("{}/", prefix_str)).unwrap(); let range_end = RelativeUnixPathBuf::new(format!("{}0", prefix_str)).unwrap(); + let lo = self + .ls_tree_hashes + .partition_point(|(k, _)| *k < range_start); + let hi = self.ls_tree_hashes.partition_point(|(k, _)| *k < range_end); let mut h = GitHashes::new(); - for (path, hash) in self.ls_tree_hashes.range(range_start..range_end) { + for (path, hash) in &self.ls_tree_hashes[lo..hi] { if let Ok(stripped) = path.strip_prefix(pkg_prefix) { h.insert(stripped, hash.clone()); } @@ -130,10 +134,11 @@ mod tests { } fn make_index(ls_tree: Vec<(&str, &str)>, status: Vec<(&str, bool)>) -> RepoGitIndex { - let ls_tree_hashes: SortedGitHashes = ls_tree + let mut ls_tree_hashes: SortedGitHashes = ls_tree .into_iter() .map(|(p, h)| (path(p), h.to_string())) - .collect::>(); + .collect::>(); + ls_tree_hashes.sort_by(|(a, _), (b, _)| a.cmp(b)); let status_entries = status .into_iter() .map(|(p, is_delete)| RepoStatusEntry { @@ -245,4 +250,92 @@ mod tests { // new.ts is untracked/modified assert_eq!(to_hash, vec![path("new.ts")]); } + + // Verifies that BTreeMap range queries produce correct results for + // prefix-based package filtering. This captures the exact behavior that + // must be preserved when switching to a sorted Vec with partition_point. + #[test] + fn test_range_query_equivalence_with_binary_search() { + let ls_tree_data = vec![ + ("apps/docs/README.md", "aaa"), + ("apps/docs/package.json", "bbb"), + ("apps/web-admin/index.ts", "ccc"), + ("apps/web/package.json", "ddd"), + ("apps/web/src/index.ts", "eee"), + ("apps/web/src/utils.ts", "fff"), + ("packages/ui/button.tsx", "ggg"), + ("packages/ui/package.json", "hhh"), + ("root.json", "iii"), + ]; + + let index = make_index(ls_tree_data.clone(), vec![]); + + // Verify the BTreeMap range query results for various prefixes. + // These exact results must be preserved after the Vec migration. + + // "apps/web" should match apps/web/* but NOT apps/web-admin/* + let (hashes, _) = index.get_package_hashes(&path("apps/web")).unwrap(); + assert_eq!(hashes.len(), 3); + assert!(hashes.contains_key(&path("package.json"))); + assert!(hashes.contains_key(&path("src/index.ts"))); + assert!(hashes.contains_key(&path("src/utils.ts"))); + + // "apps/docs" should match exactly 2 files + let (hashes, _) = index.get_package_hashes(&path("apps/docs")).unwrap(); + assert_eq!(hashes.len(), 2); + + // "packages/ui" should match exactly 2 files + let (hashes, _) = index.get_package_hashes(&path("packages/ui")).unwrap(); + assert_eq!(hashes.len(), 2); + + // A prefix that matches nothing + let (hashes, _) = index.get_package_hashes(&path("nonexistent")).unwrap(); + assert_eq!(hashes.len(), 0); + + // Also verify via sorted Vec + binary search to confirm equivalence + let sorted_vec: Vec<(RelativeUnixPathBuf, String)> = ls_tree_data + .iter() + .map(|(p, h)| (path(p), h.to_string())) + .collect(); + // Data is already in sorted order from git ls-tree + assert!( + sorted_vec.windows(2).all(|w| w[0].0 < w[1].0), + "test data must be sorted to simulate git ls-tree output" + ); + + let prefix = "apps/web"; + let range_start = path(&format!("{prefix}/")); + let range_end = path(&format!("{prefix}0")); + let lo = sorted_vec.partition_point(|(k, _)| *k < range_start); + let hi = sorted_vec.partition_point(|(k, _)| *k < range_end); + let vec_results: Vec<_> = sorted_vec[lo..hi] + .iter() + .map(|(p, h)| (p.clone(), h.clone())) + .collect(); + + // BTreeMap range and Vec partition_point must yield same entries + let btree: BTreeMap = ls_tree_data + .iter() + .map(|(p, h)| (path(p), h.to_string())) + .collect(); + let btree_results: Vec<_> = btree + .range(range_start..range_end) + .map(|(p, h)| (p.clone(), h.clone())) + .collect(); + assert_eq!(vec_results, btree_results); + } + + // Verifies that the full-copy path (empty prefix) correctly copies all + // entries. Important because the Vec migration changes iteration syntax. + #[test] + fn test_full_copy_preserves_all_entries() { + let ls_tree_data = vec![("a.ts", "111"), ("b/c.ts", "222"), ("d/e/f.ts", "333")]; + let index = make_index(ls_tree_data, vec![]); + let (hashes, to_hash) = index.get_package_hashes(&path("")).unwrap(); + assert_eq!(hashes.len(), 3); + assert_eq!(hashes.get(&path("a.ts")).unwrap(), "111"); + assert_eq!(hashes.get(&path("b/c.ts")).unwrap(), "222"); + assert_eq!(hashes.get(&path("d/e/f.ts")).unwrap(), "333"); + assert!(to_hash.is_empty()); + } } diff --git a/crates/turborepo-scm/src/status.rs b/crates/turborepo-scm/src/status.rs index 7b2fa4ae47b50..fc5cd5c7cbf27 100644 --- a/crates/turborepo-scm/src/status.rs +++ b/crates/turborepo-scm/src/status.rs @@ -53,26 +53,38 @@ impl GitRepo { /// Run `git status` once at the git repo root, returning all status entries /// with git-root-relative paths. + /// + /// Uses libgit2 to compute status in-process, avoiding the overhead of + /// spawning a git subprocess. #[tracing::instrument(skip(self))] pub(crate) fn git_status_repo_root(&self) -> Result, Error> { - let mut git = Command::new(self.bin.as_std_path()) - .args(["status", "--untracked-files", "--no-renames", "-z"]) - .env("GIT_OPTIONAL_LOCKS", "0") - .current_dir(&self.root) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; + let repo = git2::Repository::open(self.root.as_std_path()) + .map_err(|e| Error::git2_error_context(e, "opening repo for status".into()))?; + + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true) + .recurse_untracked_dirs(true) + .renames_head_to_index(false) + .renames_index_to_workdir(false); + + let statuses = repo + .statuses(Some(&mut opts)) + .map_err(|e| Error::git2_error_context(e, "computing status".into()))?; + + let mut entries = Vec::with_capacity(statuses.len()); + for entry in statuses.iter() { + let path_str = match entry.path() { + Some(p) => p, + None => continue, + }; + let path = RelativeUnixPathBuf::new(path_str)?; + let status = entry.status(); + let is_delete = + status.intersects(git2::Status::INDEX_DELETED | git2::Status::WT_DELETED); + entries.push(RepoStatusEntry { path, is_delete }); + } - let stdout = git - .stdout - .as_mut() - .ok_or_else(|| Error::git_error("failed to get stdout for git status"))?; - let mut stderr = git - .stderr - .take() - .ok_or_else(|| Error::git_error("failed to get stderr for git status"))?; - let parse_result = read_status_raw(stdout); - wait_for_success(git, &mut stderr, "git status", &self.root, parse_result) + Ok(entries) } } @@ -106,6 +118,7 @@ fn read_status( Ok(to_hash) } +#[allow(dead_code)] fn read_status_raw(reader: R) -> Result, Error> { let mut entries = Vec::new(); let mut reader = BufReader::with_capacity(64 * 1024, reader); diff --git a/crates/turborepo-scm/src/worktree.rs b/crates/turborepo-scm/src/worktree.rs index bd4dcddc9b63c..4582732e4dbc2 100644 --- a/crates/turborepo-scm/src/worktree.rs +++ b/crates/turborepo-scm/src/worktree.rs @@ -5,8 +5,6 @@ //! This enables linked worktrees to share the local cache with the main //! worktree. -use std::process::Command; - use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf}; use crate::Error; @@ -31,22 +29,47 @@ impl WorktreeInfo { /// Detect worktree configuration from a path within a Git repository. /// - /// Uses Git commands to determine: - /// - The current worktree root (`git rev-parse --show-toplevel`) - /// - The shared git directory (`git rev-parse --git-common-dir`) - /// - The main worktree root (derived from the git common directory) - /// - /// # Errors - /// - /// Returns an error if: - /// - The path is not within a Git repository - /// - Git commands fail to execute - /// - The worktree structure cannot be determined + /// When the `git2` feature is enabled, uses libgit2 to resolve worktree + /// info in-process (avoiding subprocess overhead). Otherwise falls back + /// to spawning `git rev-parse`. #[tracing::instrument] pub fn detect(path: &AbsoluteSystemPath) -> Result { - // Single git subprocess for all three queries. --show-cdup is included - // so that SCM::new can reuse the git root without spawning another - // subprocess later. + #[cfg(feature = "git2")] + { + Self::detect_git2(path) + } + #[cfg(not(feature = "git2"))] + { + Self::detect_subprocess(path) + } + } + + #[cfg(feature = "git2")] + fn detect_git2(path: &AbsoluteSystemPath) -> Result { + let repo = git2::Repository::discover(path.as_std_path()) + .map_err(|e| Error::git_error(format!("git2 repository discovery failed: {e}")))?; + + let worktree_root = repo + .workdir() + .ok_or_else(|| Error::git_error("bare repository has no workdir"))?; + let worktree_root = AbsoluteSystemPathBuf::try_from(worktree_root)?.to_realpath()?; + + let git_common_dir = repo.commondir().to_string_lossy().to_string(); + + let main_worktree_root = resolve_main_worktree_root(path, &git_common_dir)?; + let git_root = main_worktree_root.clone(); + + Ok(Self { + worktree_root, + main_worktree_root, + git_root, + }) + } + + #[cfg(not(feature = "git2"))] + fn detect_subprocess(path: &AbsoluteSystemPath) -> Result { + use std::process::Command; + let output = Command::new("git") .args([ "rev-parse", @@ -82,7 +105,6 @@ impl WorktreeInfo { .ok_or_else(|| Error::git_error("git rev-parse --show-cdup produced no output"))? .trim(); let git_root = if show_cdup.is_empty() { - // Empty --show-cdup means we're already at the git root path.to_owned() } else { let resolved = path.as_std_path().join(show_cdup); diff --git a/crates/turborepo-task-hash/src/lib.rs b/crates/turborepo-task-hash/src/lib.rs index d05636b707e7b..be1f9da3e790f 100644 --- a/crates/turborepo-task-hash/src/lib.rs +++ b/crates/turborepo-task-hash/src/lib.rs @@ -666,8 +666,14 @@ impl HashTrackerInfo for TaskHashTracker { fn expanded_inputs( &self, task_id: &TaskId, - ) -> Option> { - TaskHashTracker::get_expanded_inputs(self, task_id).map(|file_hashes| file_hashes.0.clone()) + ) -> Option> { + TaskHashTracker::get_expanded_inputs(self, task_id).map(|file_hashes| { + file_hashes + .0 + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect() + }) } } @@ -767,6 +773,65 @@ mod test { }); } + #[test] + fn test_expanded_inputs_returns_cloned_data() { + use turborepo_types::HashTrackerInfo; + + let task_id: TaskId<'static> = TaskId::new("pkg", "build"); + let file_hashes = FileHashes(HashMap::from([ + ( + RelativeUnixPathBuf::new("src/index.ts").unwrap(), + "abc123".to_string(), + ), + ( + RelativeUnixPathBuf::new("package.json").unwrap(), + "def456".to_string(), + ), + ( + RelativeUnixPathBuf::new("src/utils/helper.ts").unwrap(), + "ghi789".to_string(), + ), + ])); + + let mut input_hashes = HashMap::new(); + input_hashes.insert(task_id.clone(), Arc::new(file_hashes)); + let tracker = TaskHashTracker::new(input_hashes); + + // Via concrete method + let arc_result = tracker.get_expanded_inputs(&task_id); + assert!(arc_result.is_some()); + let arc_hashes = arc_result.unwrap(); + assert_eq!(arc_hashes.0.len(), 3); + assert_eq!( + arc_hashes + .0 + .get(&RelativeUnixPathBuf::new("src/index.ts").unwrap()), + Some(&"abc123".to_string()) + ); + + // Via trait method — returns BTreeMap (sorted, no intermediate HashMap clone) + let trait_result: Option> = + HashTrackerInfo::expanded_inputs(&tracker, &task_id); + assert!(trait_result.is_some()); + let trait_hashes = trait_result.unwrap(); + assert_eq!(trait_hashes.len(), 3); + assert_eq!( + trait_hashes.get(&RelativeUnixPathBuf::new("package.json").unwrap()), + Some(&"def456".to_string()) + ); + // BTreeMap should be sorted by key + let keys: Vec<_> = trait_hashes.keys().collect(); + assert!( + keys.windows(2).all(|w| w[0] < w[1]), + "expanded_inputs should return sorted keys" + ); + + // Missing task returns None + let missing = TaskId::new("other", "test"); + assert!(tracker.get_expanded_inputs(&missing).is_none()); + assert!(HashTrackerInfo::expanded_inputs(&tracker, &missing).is_none()); + } + // Validates that sort+dedup produces the same result as the previous // HashSet→Vec→sort approach for dependency hash deduplication. #[test] diff --git a/crates/turborepo-types/src/lib.rs b/crates/turborepo-types/src/lib.rs index fa824688a9a51..3f71bdf13f967 100644 --- a/crates/turborepo-types/src/lib.rs +++ b/crates/turborepo-types/src/lib.rs @@ -15,7 +15,11 @@ //! - [`GlobalHashInputs`]: Provides access to global hash inputs pub mod secret; -use std::{collections::HashMap, fmt, str::FromStr}; +use std::{ + collections::{BTreeMap, HashMap}, + fmt, + str::FromStr, +}; use biome_deserialize_macros::Deserializable; use clap::ValueEnum; @@ -1033,8 +1037,8 @@ pub trait HashTrackerInfo { fn expanded_outputs(&self, task_id: &TaskId) -> Option>; /// Returns the detected framework for a task fn framework(&self, task_id: &TaskId) -> Option; - /// Returns the expanded input file hashes for a task - fn expanded_inputs(&self, task_id: &TaskId) -> Option>; + /// Returns the expanded input file hashes for a task, sorted by path. + fn expanded_inputs(&self, task_id: &TaskId) -> Option>; } /// Detailed environment variable map for hash tracking.