Skip to content

Commit 92f39d4

Browse files
authored
perf: Parallelize package.json loading and reduce builder allocations (#11918)
## Summary Parallelize the package.json loading phase and reduce allocations in the package graph builder. ### Benchmarks (`--dry` runs, `--skip-infer`, 10 runs each, 5 warmup) | Repo | Packages | Tasks | Before | After | Delta | |------|----------|-------|--------|-------|-------| | Large | ~1000 | 1690 | 2.107s ± 0.033s | 2.128s ± 0.203s | ~neutral (noisy) | | Medium | ~120 | ~200 | 1.292s ± 0.071s | 1.230s ± 0.078s | **1.05x faster** | | Small | ~5 | ~5 | 821.1ms ± 18.0ms | 812.7ms ± 15.6ms | **1.01x faster** | The medium repo shows the clearest improvement — it has enough packages for rayon to help but isn't dominated by git subprocess overhead like the large repo. ### Changes **Parallel package.json loading**: `parse_package_jsons` previously loaded and parsed each package.json file sequentially in a loop. Each `PackageJson::load` call does disk I/O (`read_to_string`) and CPU-bound JSON parsing (biome). These are independent per-package, so the loop is replaced with `rayon::par_iter` to parallelize across all available cores. The sequential `add_json` processing that mutates the builder is unchanged. **Entry API in `add_json`**: The old code called `self.workspaces.insert(name.clone(), entry)` unconditionally, cloning the `PackageName` on every call. On the error path (duplicate workspace), it then did a second `get` + `clone` to retrieve the path. Now uses `HashMap::entry()` to avoid the clone on the success path (only clones once for `add_node`) and avoids the redundant lookup on the error path. **Capacity pre-allocation**: `workspaces` and `node_lookup` HashMaps are now pre-allocated with `reserve(package_jsons.len())` before the `add_json` loop, avoiding rehashing as entries are inserted.
1 parent 6ac8c28 commit 92f39d4

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/turborepo-repository/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ lazy-regex = "2.5.0"
2323
miette = { workspace = true }
2424
node-semver = "2.2.0"
2525
petgraph = { workspace = true }
26+
rayon = "1"
2627
regex = { workspace = true }
2728
rust-ini = "0.20.0"
2829
serde = { workspace = true, features = ["derive"] }

crates/turborepo-repository/src/package_graph/builder.rs

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,23 @@ impl<'a, T: PackageDiscovery> BuildState<'a, ResolvedPackageManager, T> {
266266
package_json_path: relative_json_path,
267267
..Default::default()
268268
};
269-
if let Some(existing) = self.workspaces.insert(name.clone(), entry) {
270-
let path = self
271-
.workspaces
272-
.get(&name)
273-
.expect("just inserted entry to be present")
274-
.package_json_path
275-
.clone();
276-
return Err(Error::DuplicateWorkspace {
277-
name: name.to_string(),
278-
path: path.to_string(),
279-
existing_path: existing.package_json_path.to_string(),
280-
});
269+
match self.workspaces.entry(name) {
270+
std::collections::hash_map::Entry::Vacant(vacant) => {
271+
let name = vacant.key().clone();
272+
vacant.insert(entry);
273+
self.add_node(PackageNode::Workspace(name));
274+
Ok(())
275+
}
276+
std::collections::hash_map::Entry::Occupied(occupied) => {
277+
let existing_path = occupied.get().package_json_path.to_string();
278+
let name = occupied.key().to_string();
279+
Err(Error::DuplicateWorkspace {
280+
name,
281+
path: entry.package_json_path.to_string(),
282+
existing_path,
283+
})
284+
}
281285
}
282-
self.add_node(PackageNode::Workspace(name));
283-
Ok(())
284286
}
285287

286288
// need our own type
@@ -293,15 +295,31 @@ impl<'a, T: PackageDiscovery> BuildState<'a, ResolvedPackageManager, T> {
293295
let package_jsons = match self.package_jsons.take() {
294296
Some(jsons) => Ok(jsons),
295297
None => {
296-
let mut jsons = HashMap::new();
297-
for path in self.package_discovery.discover_packages().await?.workspaces {
298-
let json = PackageJson::load(&path.package_json)?;
299-
jsons.insert(path.package_json, json);
298+
let workspace_paths: Vec<_> =
299+
self.package_discovery.discover_packages().await?.workspaces;
300+
301+
let results: Vec<_> = {
302+
use rayon::prelude::*;
303+
workspace_paths
304+
.into_par_iter()
305+
.map(|path| {
306+
let json = PackageJson::load(&path.package_json)?;
307+
Ok((path.package_json, json))
308+
})
309+
.collect::<Result<Vec<_>, Error>>()?
310+
};
311+
312+
let mut jsons = HashMap::with_capacity(results.len());
313+
for (path, json) in results {
314+
jsons.insert(path, json);
300315
}
301316
Ok::<_, Error>(jsons)
302317
}
303318
}?;
304319

320+
self.workspaces.reserve(package_jsons.len());
321+
self.node_lookup.reserve(package_jsons.len());
322+
305323
for (path, json) in package_jsons {
306324
match self.add_json(path, json) {
307325
Ok(()) => {}

0 commit comments

Comments
 (0)