Skip to content

Commit f2b116c

Browse files
committed
perf: Parallelize package.json loading and reduce builder allocations
- Parallelize package.json file I/O and JSON parsing with rayon. Each file is read and parsed independently, so this is embarrassingly parallel. The sequential add_json processing loop is unchanged. - Use HashMap entry API in add_json to avoid cloning PackageName on every insert. The old code cloned unconditionally for the insert key, then cloned again on the error path for the get. Now we only clone once in the success path for add_node. - Pre-allocate workspaces and node_lookup HashMaps with the known package count before the add_json loop.
1 parent d16f081 commit f2b116c

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/turborepo-repository/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ lazy-regex = "2.5.0"
2323
miette = { workspace = true }
2424
node-semver = "2.2.0"
2525
petgraph = { workspace = true }
26+
rayon = "1"
2627
regex = { workspace = true }
2728
rust-ini = "0.20.0"
2829
serde = { workspace = true, features = ["derive"] }

crates/turborepo-repository/src/package_graph/builder.rs

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,23 @@ impl<'a, T: PackageDiscovery> BuildState<'a, ResolvedPackageManager, T> {
266266
package_json_path: relative_json_path,
267267
..Default::default()
268268
};
269-
if let Some(existing) = self.workspaces.insert(name.clone(), entry) {
270-
let path = self
271-
.workspaces
272-
.get(&name)
273-
.expect("just inserted entry to be present")
274-
.package_json_path
275-
.clone();
276-
return Err(Error::DuplicateWorkspace {
277-
name: name.to_string(),
278-
path: path.to_string(),
279-
existing_path: existing.package_json_path.to_string(),
280-
});
269+
match self.workspaces.entry(name) {
270+
std::collections::hash_map::Entry::Vacant(vacant) => {
271+
let name = vacant.key().clone();
272+
vacant.insert(entry);
273+
self.add_node(PackageNode::Workspace(name));
274+
Ok(())
275+
}
276+
std::collections::hash_map::Entry::Occupied(occupied) => {
277+
let existing_path = occupied.get().package_json_path.to_string();
278+
let name = occupied.key().to_string();
279+
Err(Error::DuplicateWorkspace {
280+
name,
281+
path: entry.package_json_path.to_string(),
282+
existing_path,
283+
})
284+
}
281285
}
282-
self.add_node(PackageNode::Workspace(name));
283-
Ok(())
284286
}
285287

286288
// need our own type
@@ -293,15 +295,31 @@ impl<'a, T: PackageDiscovery> BuildState<'a, ResolvedPackageManager, T> {
293295
let package_jsons = match self.package_jsons.take() {
294296
Some(jsons) => Ok(jsons),
295297
None => {
296-
let mut jsons = HashMap::new();
297-
for path in self.package_discovery.discover_packages().await?.workspaces {
298-
let json = PackageJson::load(&path.package_json)?;
299-
jsons.insert(path.package_json, json);
298+
let workspace_paths: Vec<_> =
299+
self.package_discovery.discover_packages().await?.workspaces;
300+
301+
let results: Vec<_> = {
302+
use rayon::prelude::*;
303+
workspace_paths
304+
.into_par_iter()
305+
.map(|path| {
306+
let json = PackageJson::load(&path.package_json)?;
307+
Ok((path.package_json, json))
308+
})
309+
.collect::<Result<Vec<_>, Error>>()?
310+
};
311+
312+
let mut jsons = HashMap::with_capacity(results.len());
313+
for (path, json) in results {
314+
jsons.insert(path, json);
300315
}
301316
Ok::<_, Error>(jsons)
302317
}
303318
}?;
304319

320+
self.workspaces.reserve(package_jsons.len());
321+
self.node_lookup.reserve(package_jsons.len());
322+
305323
for (path, json) in package_jsons {
306324
match self.add_json(path, json) {
307325
Ok(()) => {}

0 commit comments

Comments
 (0)