diff --git a/cli/npm.rs b/cli/npm.rs index b5b22f931b696c..0e871bd6336cb5 100644 --- a/cli/npm.rs +++ b/cli/npm.rs @@ -1,13 +1,17 @@ // Copyright 2018-2026 the Deno authors. MIT license. use std::borrow::Cow; +use std::collections::HashMap; use std::collections::HashSet; +use std::ffi::OsString; +use std::num::NonZeroUsize; use std::path::PathBuf; use std::rc::Rc; use std::sync::Arc; use dashmap::DashMap; use deno_core::error::AnyError; +use deno_core::futures::StreamExt; use deno_core::serde_json; use deno_core::url::Url; use deno_error::JsErrorBox; @@ -27,6 +31,7 @@ use deno_npm_installer::lifecycle_scripts::LIFECYCLE_SCRIPTS_RUNNING_ENV_VAR; use deno_npm_installer::lifecycle_scripts::LifecycleScriptsExecutor; use deno_npm_installer::lifecycle_scripts::LifecycleScriptsExecutorOptions; use deno_npm_installer::lifecycle_scripts::PackageWithScript; +use deno_npm_installer::lifecycle_scripts::compute_lifecycle_script_layers; use deno_npm_installer::lifecycle_scripts::is_broken_default_install_script; use deno_resolver::npm::ByonmNpmResolverCreateOptions; use deno_resolver::npm::ManagedNpmResolverRc; @@ -349,6 +354,11 @@ pub struct DenoTaskLifeCycleScriptsExecutor { system_info: deno_npm::NpmSystemInfo, } +struct PackageScriptResult<'a> { + package: &'a NpmResolutionPackage, + failed: Option<&'a PackageNv>, +} + #[async_trait::async_trait(?Send)] impl LifecycleScriptsExecutor for DenoTaskLifeCycleScriptsExecutor { async fn execute( @@ -378,105 +388,48 @@ impl LifecycleScriptsExecutor for DenoTaskLifeCycleScriptsExecutor { // so the subprocess can detect that it is running as part of a lifecycle script, // and avoid trying to set up node_modules again env_vars.insert(LIFECYCLE_SCRIPTS_RUNNING_ENV_VAR.into(), "1".into()); - // we want to pass the current state of npm resolution down to the deno subprocess - // (that may be running as part of the script). we do this with an inherited temp file - // - // SAFETY: we are sharing a single temp file across all of the scripts. the file position - // will be shared among these, which is okay since we run only one script at a time. - // However, if we concurrently run scripts in the future we will - // have to have multiple temp files. - let temp_file_fd = deno_runtime::deno_process::npm_process_state_tempfile( - options.process_state.as_bytes(), - ) - .map_err(DenoTaskLifecycleScriptsError::CreateNpmProcessState)?; - // SAFETY: fd/handle is valid - let _temp_file = unsafe { std::fs::File::from_raw_io_handle(temp_file_fd) }; // make sure the file gets closed - env_vars.insert( - deno_runtime::deno_process::NPM_RESOLUTION_STATE_FD_ENV_VAR_NAME.into(), - (temp_file_fd as usize).to_string().into(), + + let concurrency = std::thread::available_parallelism() + .ok() + .and_then(|n| NonZeroUsize::new(n.get().saturating_sub(1))) + .unwrap_or_else(|| NonZeroUsize::new(2).unwrap()) + .get(); + + let layers = compute_lifecycle_script_layers( + options.packages_with_scripts, + options.snapshot, ); - for PackageWithScript { - package, - scripts, - package_folder, - } in options.packages_with_scripts - { - // add custom commands for binaries from the package's dependencies. this will take precedence over the - // baseline commands, so if the package relies on a bin that conflicts with one higher in the dependency tree, the - // correct bin will be used. - let custom_commands = self - .resolve_custom_commands_from_deps( - options.extra_info_provider, - base.clone(), - package, - options.snapshot, - ) - .await; - for script_name in ["preinstall", "install", "postinstall"] { - if let Some(script) = scripts.get(script_name) { - if script_name == "install" - && is_broken_default_install_script(&sys, script, package_folder) - { - continue; - } - let _guard = self.progress_bar.update_with_prompt( - ProgressMessagePrompt::Initialize, - &format!("{}: running '{script_name}' script", package.id.nv), - ); - let crate::task_runner::TaskResult { - exit_code, - stderr, - stdout, - } = - crate::task_runner::run_task(crate::task_runner::RunTaskOptions { - task_name: script_name, - script, - cwd: package_folder.clone(), - env_vars: env_vars.clone(), - custom_commands: custom_commands.clone(), - init_cwd: options.init_cwd, - argv: &[], - root_node_modules_dir: Some(options.root_node_modules_dir_path), - stdio: Some(crate::task_runner::TaskIo { - stderr: TaskStdio::piped(), - stdout: TaskStdio::piped(), - }), - kill_signal: kill_signal.clone(), - }) - .await - .map_err(DenoTaskLifecycleScriptsError::Task)?; - let stdout = stdout.unwrap(); - let stderr = stderr.unwrap(); - if exit_code != 0 { - log::warn!( - "error: script '{}' in '{}' failed with exit code {}{}{}", - script_name, - package.id.nv, - exit_code, - if !stdout.trim_ascii().is_empty() { - format!( - "\nstdout:\n{}\n", - String::from_utf8_lossy(&stdout).trim() - ) - } else { - String::new() - }, - if !stderr.trim_ascii().is_empty() { - format!( - "\nstderr:\n{}\n", - String::from_utf8_lossy(&stderr).trim() - ) - } else { - String::new() - }, - ); - failed_packages.push(&package.id.nv); - // assume if earlier script fails, later ones will fail too - break; - } + + for layer in &layers { + log::debug!( + "Running lifecycle scripts layer: {}", + layer + .iter() + .map(|l| l.package.id.as_serialized()) + .collect::>() + .join(", ") + ); + + let mut results = + deno_core::futures::stream::iter(layer.iter().map(|pkg| { + self.run_single_package_scripts( + pkg, + &env_vars, + &base, + &options, + &kill_signal, + &sys, + ) + })) + .buffer_unordered(concurrency); + + while let Some(result) = results.next().await { + let result = result?; + if let Some(nv) = result.failed { + failed_packages.push(nv); } + (options.on_ran_pkg_scripts)(result.package)?; } - (options.on_ran_pkg_scripts)(package)?; } // re-set up bin entries for the packages which we've run scripts for. @@ -524,6 +477,118 @@ impl DenoTaskLifeCycleScriptsExecutor { } } + /// Runs lifecycle scripts for a single package (preinstall, install, + /// postinstall in order). Each package gets its own temp file for + /// npm process state so concurrent execution is safe. + async fn run_single_package_scripts<'a>( + &self, + pkg: &'a PackageWithScript<'a>, + env_vars: &HashMap, + base_custom_commands: &crate::task_runner::TaskCustomCommands, + options: &LifecycleScriptsExecutorOptions<'a>, + kill_signal: &KillSignal, + sys: &CliSys, + ) -> Result, AnyError> { + let PackageWithScript { + package, + scripts, + package_folder, + } = pkg; + + // each concurrent package gets its own temp file to avoid fd races + let temp_file_fd = deno_runtime::deno_process::npm_process_state_tempfile( + options.process_state.as_bytes(), + ) + .map_err(DenoTaskLifecycleScriptsError::CreateNpmProcessState)?; + // SAFETY: fd/handle is valid + let _temp_file = unsafe { std::fs::File::from_raw_io_handle(temp_file_fd) }; + let mut env_vars = env_vars.clone(); + env_vars.insert( + deno_runtime::deno_process::NPM_RESOLUTION_STATE_FD_ENV_VAR_NAME.into(), + (temp_file_fd as usize).to_string().into(), + ); + + // add custom commands for binaries from the package's dependencies. + // this will take precedence over the baseline commands, so if the + // package relies on a bin that conflicts with one higher in the + // dependency tree, the correct bin will be used. + let custom_commands = self + .resolve_custom_commands_from_deps( + options.extra_info_provider, + base_custom_commands.clone(), + package, + options.snapshot, + ) + .await; + + let mut failed = None; + for script_name in ["preinstall", "install", "postinstall"] { + if let Some(script) = scripts.get(script_name) { + if script_name == "install" + && is_broken_default_install_script(sys, script, package_folder) + { + continue; + } + let _guard = self.progress_bar.update_with_prompt( + ProgressMessagePrompt::Initialize, + &format!("{}: running '{script_name}' script", package.id.nv), + ); + let crate::task_runner::TaskResult { + exit_code, + stderr, + stdout, + } = crate::task_runner::run_task(crate::task_runner::RunTaskOptions { + task_name: script_name, + script, + cwd: package_folder.clone(), + env_vars: env_vars.clone(), + custom_commands: custom_commands.clone(), + init_cwd: options.init_cwd, + argv: &[], + root_node_modules_dir: Some(options.root_node_modules_dir_path), + stdio: Some(crate::task_runner::TaskIo { + stderr: TaskStdio::piped(), + stdout: TaskStdio::piped(), + }), + kill_signal: kill_signal.clone(), + }) + .await + .map_err(DenoTaskLifecycleScriptsError::Task)?; + let stdout = stdout.unwrap(); + let stderr = stderr.unwrap(); + if exit_code != 0 { + log::warn!( + "error: script '{}' in '{}' failed with exit code {}{}{}", + script_name, + package.id.nv, + exit_code, + if !stdout.trim_ascii().is_empty() { + format!( + "\nstdout:\n{}\n", + String::from_utf8_lossy(&stdout).trim() + ) + } else { + String::new() + }, + if !stderr.trim_ascii().is_empty() { + format!( + "\nstderr:\n{}\n", + String::from_utf8_lossy(&stderr).trim() + ) + } else { + String::new() + }, + ); + failed = Some(&package.id.nv); + // assume if earlier script fails, later ones will fail too + break; + } + } + } + + Ok(PackageScriptResult { package, failed }) + } + // take in all (non copy) packages from snapshot, // and resolve the set of available binaries to create // custom commands available to the task runner diff --git a/libs/npm_installer/lifecycle_scripts.rs b/libs/npm_installer/lifecycle_scripts.rs index bfde7ef525bba8..41484f42119b74 100644 --- a/libs/npm_installer/lifecycle_scripts.rs +++ b/libs/npm_installer/lifecycle_scripts.rs @@ -2,12 +2,15 @@ use std::borrow::Cow; use std::collections::HashMap; +use std::collections::HashSet; +use std::collections::VecDeque; use std::path::Path; use std::path::PathBuf; use anyhow::Error as AnyError; use deno_error::JsErrorBox; use deno_npm::NpmPackageExtraInfo; +use deno_npm::NpmPackageId; use deno_npm::NpmResolutionPackage; use deno_npm::resolution::NpmResolutionSnapshot; use deno_semver::SmallStackString; @@ -20,6 +23,7 @@ use crate::CachedNpmPackageExtraInfoProvider; use crate::LifecycleScriptsConfig; use crate::PackagesAllowedScripts; +#[derive(Debug)] pub struct PackageWithScript<'a> { pub package: &'a NpmResolutionPackage, pub scripts: HashMap, @@ -256,3 +260,292 @@ pub static LIFECYCLE_SCRIPTS_RUNNING_ENV_VAR: &str = pub fn is_running_lifecycle_script(sys: &impl sys_traits::EnvVar) -> bool { sys.env_var(LIFECYCLE_SCRIPTS_RUNNING_ENV_VAR).is_ok() } + +/// Groups packages with lifecycle scripts into topological layers using +/// Kahn's algorithm. Packages in the same layer have no inter-dependencies +/// (considering only packages that have lifecycle scripts), so they can +/// run in parallel. Later layers depend on earlier ones. +/// +/// This considers transitive dependencies through the full snapshot, not +/// just direct dependencies. For example, if A depends on B (no scripts) +/// which depends on C (has scripts), A will be placed in a later layer +/// than C. +pub fn compute_lifecycle_script_layers<'a>( + packages: &'a [PackageWithScript<'a>], + snapshot: &NpmResolutionSnapshot, +) -> Vec>> { + if packages.len() <= 1 { + return vec![packages.iter().collect()]; + } + + let start = std::time::Instant::now(); + let script_pkg_ids: HashSet<&NpmPackageId> = + packages.iter().map(|p| &p.package.id).collect(); + let pkg_by_id: HashMap<&NpmPackageId, &PackageWithScript> = + packages.iter().map(|p| (&p.package.id, p)).collect(); + + // for each package, find transitive deps that have lifecycle scripts + // (walking through intermediate packages that don't have scripts) + let mut in_degree: HashMap<&NpmPackageId, usize> = HashMap::new(); + let mut dependents: HashMap<&NpmPackageId, Vec<&NpmPackageId>> = + HashMap::new(); + for pkg in packages { + let transitive_script_deps = + find_transitive_script_deps(pkg.package, &script_pkg_ids, snapshot); + in_degree.insert(&pkg.package.id, transitive_script_deps.len()); + for dep_id in transitive_script_deps { + dependents.entry(dep_id).or_default().push(&pkg.package.id); + } + } + + // if no package has any script deps, everything is one layer + if in_degree.values().all(|°| deg == 0) { + return vec![packages.iter().collect()]; + } + + // peel off layers using Kahn's algorithm + let mut layers = Vec::new(); + let mut queue: VecDeque<&NpmPackageId> = in_degree + .iter() + .filter(|(_, deg)| **deg == 0) + .map(|(&id, _)| id) + .collect(); + + while !queue.is_empty() { + let layer: Vec<&PackageWithScript> = + queue.iter().map(|id| pkg_by_id[id]).collect(); + let mut next_queue = VecDeque::new(); + for id in queue.drain(..) { + if let Some(deps) = dependents.get(id) { + for &dep_id in deps { + let deg = in_degree.get_mut(dep_id).unwrap(); + *deg -= 1; + if *deg == 0 { + next_queue.push_back(dep_id); + } + } + } + } + layers.push(layer); + queue = next_queue; + } + + log::debug!( + "Computed lifecycle script layers in {}ms.", + start.elapsed().as_millis() + ); + + layers +} + +/// Finds all transitive dependency package IDs that have lifecycle scripts, +/// walking through intermediate packages that may not have scripts themselves. +fn find_transitive_script_deps<'a>( + package: &'a NpmResolutionPackage, + script_pkg_ids: &HashSet<&'a NpmPackageId>, + snapshot: &'a NpmResolutionSnapshot, +) -> HashSet<&'a NpmPackageId> { + let mut result = HashSet::new(); + let mut visited = HashSet::new(); + let mut stack: Vec<&NpmPackageId> = package.dependencies.values().collect(); + + while let Some(dep_id) = stack.pop() { + if !visited.insert(dep_id) { + continue; + } + if script_pkg_ids.contains(dep_id) { + result.insert(dep_id); + // don't walk further — this script package forms a layer boundary + continue; + } + // walk through non-script packages to find transitive script deps + if let Some(dep_pkg) = snapshot.package_from_id(dep_id) { + stack.extend(dep_pkg.dependencies.values()); + } + } + + result +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + use std::path::PathBuf; + + use deno_npm::NpmPackageId; + use deno_npm::resolution::NpmResolutionSnapshot; + use deno_npm::resolution::SerializedNpmResolutionSnapshot; + use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; + use deno_semver::StackString; + use deno_semver::package::PackageReq; + + use super::PackageWithScript; + use super::compute_lifecycle_script_layers; + + fn pkg_id(s: &str) -> NpmPackageId { + NpmPackageId::from_serialized(s).unwrap() + } + + fn deps(pairs: &[(&str, &str)]) -> HashMap { + pairs + .iter() + .map(|(k, v)| (StackString::from(*k), pkg_id(v))) + .collect() + } + + fn pkg( + id: &str, + dependencies: &[(&str, &str)], + ) -> SerializedNpmResolutionSnapshotPackage { + SerializedNpmResolutionSnapshotPackage { + id: pkg_id(id), + system: Default::default(), + dist: None, + dependencies: deps(dependencies), + optional_dependencies: Default::default(), + optional_peer_dependencies: Default::default(), + extra: None, + is_deprecated: false, + has_bin: false, + has_scripts: false, + } + } + + fn make_snapshot( + root: &[(&str, &str)], + packages: Vec, + ) -> NpmResolutionSnapshot { + let serialized = SerializedNpmResolutionSnapshot { + root_packages: root + .iter() + .map(|(k, v)| (PackageReq::from_str(k).unwrap(), pkg_id(v))) + .collect(), + packages, + }; + NpmResolutionSnapshot::new(serialized.into_valid().unwrap()) + } + + fn make_pkg_with_script<'a>( + id: &str, + snapshot: &'a NpmResolutionSnapshot, + ) -> PackageWithScript<'a> { + PackageWithScript { + package: snapshot.package_from_id(&pkg_id(id)).unwrap(), + scripts: HashMap::from([("install".into(), "node-gyp rebuild".into())]), + package_folder: PathBuf::from(format!("/tmp/{id}")), + } + } + + /// extracts sorted package names from each layer for easy assertion + fn layer_names(layers: &[Vec<&PackageWithScript>]) -> Vec> { + layers + .iter() + .map(|layer| { + let mut names: Vec = layer + .iter() + .map(|p| p.package.id.nv.name.to_string()) + .collect(); + names.sort(); + names + }) + .collect() + } + + #[test] + fn single_package() { + let snapshot = + make_snapshot(&[("a@1", "a@1.0.0")], vec![pkg("a@1.0.0", &[])]); + let pkgs = vec![make_pkg_with_script("a@1.0.0", &snapshot)]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!(layers.len(), 1); + assert_eq!(layer_names(&layers), vec![vec!["a"]]); + } + + #[test] + fn no_interdependencies() { + // a and b have scripts but don't depend on each other + let snapshot = make_snapshot( + &[("a@1", "a@1.0.0"), ("b@1", "b@1.0.0")], + vec![pkg("a@1.0.0", &[]), pkg("b@1.0.0", &[])], + ); + let pkgs = vec![ + make_pkg_with_script("a@1.0.0", &snapshot), + make_pkg_with_script("b@1.0.0", &snapshot), + ]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!(layers.len(), 1); + assert_eq!(layer_names(&layers), vec![vec!["a", "b"]]); + } + + #[test] + fn direct_dependency_chain() { + // a depends on b, both have scripts => two layers + let snapshot = make_snapshot( + &[("a@1", "a@1.0.0")], + vec![pkg("a@1.0.0", &[("b", "b@1.0.0")]), pkg("b@1.0.0", &[])], + ); + let pkgs = vec![ + make_pkg_with_script("a@1.0.0", &snapshot), + make_pkg_with_script("b@1.0.0", &snapshot), + ]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!(layer_names(&layers), vec![vec!["b"], vec!["a"]]); + } + + #[test] + fn transitive_through_non_script_package() { + // a -> b (no scripts) -> c (has scripts) + // a and c should be in different layers + let snapshot = make_snapshot( + &[("a@1", "a@1.0.0")], + vec![ + pkg("a@1.0.0", &[("b", "b@1.0.0")]), + pkg("b@1.0.0", &[("c", "c@1.0.0")]), + pkg("c@1.0.0", &[]), + ], + ); + // only a and c have scripts, b does not + let pkgs = vec![ + make_pkg_with_script("a@1.0.0", &snapshot), + make_pkg_with_script("c@1.0.0", &snapshot), + ]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!(layer_names(&layers), vec![vec!["c"], vec!["a"]]); + } + + #[test] + fn diamond_dependency() { + // a -> b, a -> c, b -> d, c -> d + // all have scripts + // layer 0: d, layer 1: b + c, layer 2: a + let snapshot = make_snapshot( + &[("a@1", "a@1.0.0")], + vec![ + pkg("a@1.0.0", &[("b", "b@1.0.0"), ("c", "c@1.0.0")]), + pkg("b@1.0.0", &[("d", "d@1.0.0")]), + pkg("c@1.0.0", &[("d", "d@1.0.0")]), + pkg("d@1.0.0", &[]), + ], + ); + let pkgs = vec![ + make_pkg_with_script("a@1.0.0", &snapshot), + make_pkg_with_script("b@1.0.0", &snapshot), + make_pkg_with_script("c@1.0.0", &snapshot), + make_pkg_with_script("d@1.0.0", &snapshot), + ]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!( + layer_names(&layers), + vec![vec!["d"], vec!["b", "c"], vec!["a"]] + ); + } + + #[test] + fn empty_packages() { + let snapshot = make_snapshot(&[], vec![]); + let pkgs: Vec = vec![]; + let layers = compute_lifecycle_script_layers(&pkgs, &snapshot); + assert_eq!(layers.len(), 1); + assert!(layers[0].is_empty()); + } +}