Skip to content

Commit 8f083cf

Browse files
committed
test: Add regression tests for hotpath performance optimizations
1 parent af1a1b0 commit 8f083cf

File tree

4 files changed

+331
-6
lines changed

4 files changed

+331
-6
lines changed

crates/turborepo-run-cache/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ turbopath = { workspace = true }
1919
turborepo-cache = { workspace = true }
2020
turborepo-hash = { workspace = true }
2121
turborepo-repository = { path = "../turborepo-repository" }
22-
turborepo-scm = { workspace = true }
22+
turborepo-scm = { workspace = true, features = ["git2"] }
2323
turborepo-task-id = { workspace = true }
2424
turborepo-telemetry = { path = "../turborepo-telemetry" }
2525
turborepo-types = { workspace = true }

crates/turborepo-scm/src/manual.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,4 +464,104 @@ mod tests {
464464

465465
assert_eq!(hashes, expected);
466466
}
467+
468+
#[test]
469+
fn test_include_default_files_deduplicates_with_explicit_includes() {
470+
// When include_default_files=true AND explicit includes are provided,
471+
// the first walk collects files matching the includes, and the second
472+
// walk collects gitignore-respecting defaults. Files appearing in both
473+
// walks should not be hashed twice — verify that the result is correct
474+
// and contains each file exactly once.
475+
let (_tmp, turbo_root) = tmp_dir();
476+
let pkg_path = AnchoredSystemPathBuf::from_raw("my-pkg").unwrap();
477+
let pkg_dir = turbo_root.resolve(&pkg_path);
478+
pkg_dir.create_dir_all().unwrap();
479+
480+
// Create files: one matched by the explicit include, one only in defaults
481+
let shared_file = pkg_dir.join_component("shared.ts");
482+
shared_file.create_with_contents("shared content").unwrap();
483+
484+
let default_only = pkg_dir.join_component("default-only.ts");
485+
default_only
486+
.create_with_contents("default only content")
487+
.unwrap();
488+
489+
let package_json = pkg_dir.join_component("package.json");
490+
package_json.create_with_contents("{}").unwrap();
491+
492+
let turbo_json = pkg_dir.join_component("turbo.json");
493+
turbo_json.create_with_contents("{}").unwrap();
494+
495+
// "*.ts" matches both shared.ts and default-only.ts in the first walk
496+
// (since git_ignore=false for explicit inputs). The second walk with
497+
// git_ignore=true should not re-hash files already found.
498+
let hashes = get_package_file_hashes_without_git(
499+
&turbo_root,
500+
&pkg_path,
501+
&["*.ts"],
502+
true,
503+
)
504+
.unwrap();
505+
506+
// All four files should appear exactly once
507+
assert!(
508+
hashes.contains_key(&RelativeUnixPathBuf::new("shared.ts").unwrap()),
509+
"shared.ts should be present"
510+
);
511+
assert!(
512+
hashes.contains_key(&RelativeUnixPathBuf::new("default-only.ts").unwrap()),
513+
"default-only.ts should be present"
514+
);
515+
assert!(
516+
hashes.contains_key(&RelativeUnixPathBuf::new("package.json").unwrap()),
517+
"package.json should be present (added by include pattern augmentation)"
518+
);
519+
assert!(
520+
hashes.contains_key(&RelativeUnixPathBuf::new("turbo.json").unwrap()),
521+
"turbo.json should be present (added by include pattern augmentation)"
522+
);
523+
524+
// Verify the hash values are deterministic (same content = same hash)
525+
let hashes2 = get_package_file_hashes_without_git(
526+
&turbo_root,
527+
&pkg_path,
528+
&["*.ts"],
529+
true,
530+
)
531+
.unwrap();
532+
assert_eq!(hashes, hashes2, "hashes should be deterministic");
533+
}
534+
535+
#[test]
536+
fn test_include_default_files_with_exclusion() {
537+
// Verify that exclusions work correctly when include_default_files=true:
538+
// excluded files should not appear even if the default walk finds them.
539+
let (_tmp, turbo_root) = tmp_dir();
540+
let pkg_path = AnchoredSystemPathBuf::from_raw("lib").unwrap();
541+
let pkg_dir = turbo_root.resolve(&pkg_path);
542+
pkg_dir.create_dir_all().unwrap();
543+
544+
let keep = pkg_dir.join_component("keep.ts");
545+
keep.create_with_contents("keep").unwrap();
546+
547+
let excluded = pkg_dir.join_component("excluded.ts");
548+
excluded.create_with_contents("excluded").unwrap();
549+
550+
let hashes = get_package_file_hashes_without_git(
551+
&turbo_root,
552+
&pkg_path,
553+
&["*.ts", "!excluded.ts"],
554+
true,
555+
)
556+
.unwrap();
557+
558+
assert!(
559+
hashes.contains_key(&RelativeUnixPathBuf::new("keep.ts").unwrap()),
560+
"keep.ts should be present"
561+
);
562+
assert!(
563+
!hashes.contains_key(&RelativeUnixPathBuf::new("excluded.ts").unwrap()),
564+
"excluded.ts should NOT be present"
565+
);
566+
}
467567
}

crates/turborepo-scm/src/repo_index.rs

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use tracing::{debug, trace};
44
use turbopath::RelativeUnixPathBuf;
55

6-
use crate::{Error, GitHashes, GitRepo, ls_tree::SortedGitHashes, status::RepoStatusEntry};
6+
use crate::{ls_tree::SortedGitHashes, status::RepoStatusEntry, Error, GitHashes, GitRepo};
77

88
/// Pre-computed repo-wide git index that caches the results of `git ls-tree`
99
/// and `git status` so they can be filtered per-package without spawning
@@ -116,3 +116,133 @@ impl RepoGitIndex {
116116
Ok((hashes, to_hash))
117117
}
118118
}
119+
120+
#[cfg(test)]
121+
mod tests {
122+
use std::collections::BTreeMap;
123+
124+
use turbopath::RelativeUnixPathBuf;
125+
126+
use super::*;
127+
128+
fn path(s: &str) -> RelativeUnixPathBuf {
129+
RelativeUnixPathBuf::new(s).unwrap()
130+
}
131+
132+
fn make_index(ls_tree: Vec<(&str, &str)>, status: Vec<(&str, bool)>) -> RepoGitIndex {
133+
let ls_tree_hashes: SortedGitHashes = ls_tree
134+
.into_iter()
135+
.map(|(p, h)| (path(p), h.to_string()))
136+
.collect::<BTreeMap<_, _>>();
137+
let status_entries = status
138+
.into_iter()
139+
.map(|(p, is_delete)| RepoStatusEntry {
140+
path: path(p),
141+
is_delete,
142+
})
143+
.collect();
144+
RepoGitIndex {
145+
ls_tree_hashes,
146+
status_entries,
147+
}
148+
}
149+
150+
#[test]
151+
fn test_empty_prefix_returns_all_files() {
152+
let index = make_index(
153+
vec![
154+
("apps/web/src/index.ts", "aaa"),
155+
("packages/ui/button.tsx", "bbb"),
156+
("root-file.json", "ccc"),
157+
],
158+
vec![],
159+
);
160+
let (hashes, to_hash) = index.get_package_hashes(&path("")).unwrap();
161+
assert_eq!(hashes.len(), 3);
162+
assert!(hashes.contains_key(&path("apps/web/src/index.ts")));
163+
assert!(hashes.contains_key(&path("packages/ui/button.tsx")));
164+
assert!(hashes.contains_key(&path("root-file.json")));
165+
assert!(to_hash.is_empty());
166+
}
167+
168+
#[test]
169+
fn test_prefix_filters_to_package_and_strips_prefix() {
170+
let index = make_index(
171+
vec![
172+
("apps/web/src/index.ts", "aaa"),
173+
("apps/web/package.json", "bbb"),
174+
("apps/docs/README.md", "ccc"),
175+
("packages/ui/button.tsx", "ddd"),
176+
],
177+
vec![],
178+
);
179+
let (hashes, to_hash) = index.get_package_hashes(&path("apps/web")).unwrap();
180+
assert_eq!(hashes.len(), 2);
181+
assert_eq!(hashes.get(&path("src/index.ts")).unwrap(), "aaa");
182+
assert_eq!(hashes.get(&path("package.json")).unwrap(), "bbb");
183+
assert!(to_hash.is_empty());
184+
}
185+
186+
#[test]
187+
fn test_prefix_does_not_match_sibling_with_shared_prefix() {
188+
// "apps/web-admin" should NOT match when filtering for "apps/web"
189+
let index = make_index(
190+
vec![
191+
("apps/web/index.ts", "aaa"),
192+
("apps/web-admin/index.ts", "bbb"),
193+
],
194+
vec![],
195+
);
196+
let (hashes, _) = index.get_package_hashes(&path("apps/web")).unwrap();
197+
assert_eq!(hashes.len(), 1);
198+
assert!(hashes.contains_key(&path("index.ts")));
199+
}
200+
201+
#[test]
202+
fn test_status_modified_file_added_to_to_hash() {
203+
let index = make_index(
204+
vec![("my-pkg/file.ts", "aaa")],
205+
vec![("my-pkg/new-file.ts", false)],
206+
);
207+
let (hashes, to_hash) = index.get_package_hashes(&path("my-pkg")).unwrap();
208+
assert_eq!(hashes.len(), 1);
209+
assert_eq!(to_hash, vec![path("my-pkg/new-file.ts")]);
210+
}
211+
212+
#[test]
213+
fn test_status_deleted_file_removed_from_hashes() {
214+
let index = make_index(
215+
vec![("my-pkg/keep.ts", "aaa"), ("my-pkg/deleted.ts", "bbb")],
216+
vec![("my-pkg/deleted.ts", true)],
217+
);
218+
let (hashes, to_hash) = index.get_package_hashes(&path("my-pkg")).unwrap();
219+
assert_eq!(hashes.len(), 1);
220+
assert!(hashes.contains_key(&path("keep.ts")));
221+
assert!(!hashes.contains_key(&path("deleted.ts")));
222+
assert!(to_hash.is_empty());
223+
}
224+
225+
#[test]
226+
fn test_status_entries_for_other_packages_ignored() {
227+
let index = make_index(
228+
vec![("pkg-a/file.ts", "aaa")],
229+
vec![("pkg-b/new.ts", false), ("pkg-b/gone.ts", true)],
230+
);
231+
let (hashes, to_hash) = index.get_package_hashes(&path("pkg-a")).unwrap();
232+
assert_eq!(hashes.len(), 1);
233+
assert!(to_hash.is_empty());
234+
}
235+
236+
#[test]
237+
fn test_empty_prefix_with_status() {
238+
let index = make_index(
239+
vec![("file.ts", "aaa")],
240+
vec![("new.ts", false), ("file.ts", true)],
241+
);
242+
let (hashes, to_hash) = index.get_package_hashes(&path("")).unwrap();
243+
// file.ts was deleted via status
244+
assert!(hashes.is_empty());
245+
// new.ts is untracked/modified
246+
assert_eq!(to_hash, vec![path("new.ts")]);
247+
}
248+
}

crates/turborepo-task-hash/src/lib.rs

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ use turborepo_cache::CacheHitMetadata;
2323
// Re-export turborepo_engine::TaskNode for convenience
2424
pub use turborepo_engine::TaskNode;
2525
use turborepo_env::{
26-
BUILTIN_PASS_THROUGH_ENV, BySource, CompiledWildcards, DetailedMap, EnvironmentVariableMap,
26+
BySource, CompiledWildcards, DetailedMap, EnvironmentVariableMap, BUILTIN_PASS_THROUGH_ENV,
2727
};
28-
use turborepo_frameworks::{Slug as FrameworkSlug, infer_framework};
28+
use turborepo_frameworks::{infer_framework, Slug as FrameworkSlug};
2929
use turborepo_hash::{FileHashes, LockFilePackagesRef, TaskHashable, TurboHash};
3030
use turborepo_repository::package_graph::{PackageInfo, PackageName};
3131
use turborepo_scm::{RepoGitIndex, SCM};
@@ -433,8 +433,7 @@ impl<'a, R: RunOptsHashInfo> TaskHasher<'a, R> {
433433
// Collect owned strings directly to avoid borrow lifetime issues with
434434
// the RwLock guard. We sort + dedup instead of using a HashSet to avoid
435435
// the overhead of hashing the hash strings.
436-
let mut dependency_hash_list: Vec<String> =
437-
Vec::with_capacity(dependency_set.len());
436+
let mut dependency_hash_list: Vec<String> = Vec::with_capacity(dependency_set.len());
438437
for dependency_task in &dependency_set {
439438
let TaskNode::Task(dependency_task_id) = dependency_task else {
440439
continue;
@@ -701,4 +700,100 @@ mod test {
701700
assert_send::<TaskHashTracker>();
702701
assert_sync::<TaskHashTracker>();
703702
}
703+
704+
#[test]
705+
fn test_hash_tracker_concurrent_reads() {
706+
let tracker = TaskHashTracker::new(HashMap::new());
707+
let task_id: TaskId<'static> = TaskId::new("pkg", "build");
708+
tracker.insert_hash(
709+
task_id.clone(),
710+
DetailedMap::default(),
711+
"abc123".to_string(),
712+
None,
713+
);
714+
715+
// Multiple concurrent reads should not deadlock or panic with RwLock
716+
std::thread::scope(|s| {
717+
for _ in 0..8 {
718+
let tracker = &tracker;
719+
let task_id = &task_id;
720+
s.spawn(move || {
721+
for _ in 0..100 {
722+
let h = tracker.hash(task_id);
723+
assert_eq!(h.as_deref(), Some("abc123"));
724+
}
725+
});
726+
}
727+
});
728+
}
729+
730+
#[test]
731+
fn test_hash_tracker_concurrent_read_write() {
732+
let tracker = TaskHashTracker::new(HashMap::new());
733+
734+
// Pre-create owned task IDs to avoid lifetime issues with TaskId borrows
735+
let task_ids: Vec<TaskId<'static>> = (0..50)
736+
.map(|i| TaskId::new("pkg", &format!("task-{i}")).into_owned())
737+
.collect();
738+
739+
// One writer, many readers — verifies RwLock allows concurrent reads
740+
// while writes are exclusive, without deadlock.
741+
std::thread::scope(|s| {
742+
let tracker = &tracker;
743+
let task_ids = &task_ids;
744+
745+
s.spawn(move || {
746+
for (i, task_id) in task_ids.iter().enumerate() {
747+
tracker.insert_hash(
748+
task_id.clone(),
749+
DetailedMap::default(),
750+
format!("hash-{i}"),
751+
None,
752+
);
753+
}
754+
});
755+
756+
for _ in 0..4 {
757+
s.spawn(move || {
758+
for task_id in task_ids {
759+
// May or may not find the hash depending on timing — that's fine,
760+
// we're testing for absence of panics/deadlocks.
761+
let _ = tracker.hash(task_id);
762+
let _ = tracker.env_vars(task_id);
763+
let _ = tracker.cache_status(task_id);
764+
}
765+
});
766+
}
767+
});
768+
}
769+
770+
// Validates that sort+dedup produces the same result as the previous
771+
// HashSet→Vec→sort approach for dependency hash deduplication.
772+
#[test]
773+
fn test_sort_dedup_matches_hashset_behavior() {
774+
let inputs: Vec<Vec<&str>> = vec![
775+
vec!["abc", "def", "abc", "ghi", "def"],
776+
vec!["zzz", "aaa", "mmm"],
777+
vec!["same", "same", "same"],
778+
vec![],
779+
vec!["only-one"],
780+
];
781+
782+
for input in inputs {
783+
// New approach: sort + dedup
784+
let mut sort_dedup: Vec<String> = input.iter().map(|s| s.to_string()).collect();
785+
sort_dedup.sort_unstable();
786+
sort_dedup.dedup();
787+
788+
// Old approach: HashSet → Vec → sort
789+
let hash_set: HashSet<String> = input.iter().map(|s| s.to_string()).collect();
790+
let mut hashset_sorted: Vec<String> = hash_set.into_iter().collect();
791+
hashset_sorted.sort();
792+
793+
assert_eq!(
794+
sort_dedup, hashset_sorted,
795+
"sort+dedup and hashset+sort should produce identical results for: {input:?}"
796+
);
797+
}
798+
}
704799
}

0 commit comments

Comments
 (0)