From 7071a094fb96e438721b05f096d7a1da9e12f141 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 01:49:02 +0800 Subject: [PATCH 01/21] perf(pm): add resolver manifest provider boundary --- crates/ruborist/src/service/mod.rs | 2 + crates/ruborist/src/service/provider.rs | 82 +++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 crates/ruborist/src/service/provider.rs diff --git a/crates/ruborist/src/service/mod.rs b/crates/ruborist/src/service/mod.rs index 14723d5ed..58f41f9a9 100644 --- a/crates/ruborist/src/service/mod.rs +++ b/crates/ruborist/src/service/mod.rs @@ -49,6 +49,7 @@ pub(crate) mod fetch; mod fs; pub(crate) mod http; pub(crate) mod manifest; +mod provider; mod registry; mod store; @@ -64,5 +65,6 @@ pub use manifest::{ FetchVersionManifestOptions, MetadataFormat, fetch_full_manifest, fetch_full_manifest_bytes, fetch_full_manifest_fresh, fetch_version_manifest, fetch_version_manifest_bytes, }; +pub use provider::{ManifestFullData, ManifestJob, ManifestJobDone, ManifestProvider}; pub use registry::UnifiedRegistry; pub use store::{ManifestStore, NoopStore}; diff --git a/crates/ruborist/src/service/provider.rs b/crates/ruborist/src/service/provider.rs new file mode 100644 index 000000000..65609a0ed --- /dev/null +++ b/crates/ruborist/src/service/provider.rs @@ -0,0 +1,82 @@ +//! Manifest provider boundary for resolver drivers. +//! +//! The demand BFS loop owns per-run cache, waiters, and inflight de-duplication. +//! A provider only executes one manifest job and hides whether it satisfied the +//! job from memory, persistent storage, or the network. + +use std::sync::Arc; + +use async_trait::async_trait; + +use super::cache::VersionsInfo; +use super::manifest::MetadataFormat; +use crate::model::manifest::{CoreVersionManifest, FullManifest}; +use crate::traits::registry::RegistryClient; + +/// Full-manifest data returned by a provider job. +#[derive(Clone)] +pub enum ManifestFullData { + /// A parsed full manifest. When the original job carried a spec, the + /// provider may also return the matching version manifest extracted in the + /// same worker task so the main loop can avoid an extra extract hop. + Full { + manifest: Arc, + speculative: Option<(String, Arc)>, + }, + /// A validated versions list, usually from a 304 path. The main loop can + /// resolve a concrete version and schedule a version-manifest job. + Versions(Arc), +} + +/// Unit of work spawned by the demand BFS loop. +#[derive(Clone)] +pub enum ManifestJob { + Full { + name: String, + /// Optional range/tag from the BFS edge that caused this full-manifest + /// fetch. The provider can use it to speculatively extract the current + /// version while the full manifest bytes are already on a CPU worker. + spec: Option, + }, + Version { + name: String, + /// Cache/waiter key owned by the main loop. + spec: String, + /// Registry request spec. For npmjs 304 flows this is the resolved + /// exact version, while `spec` remains the original range key. + fetch_spec: String, + /// Metadata format for the version endpoint. Semver-capable registries + /// accept install-v1 for range/tag queries; npmjs exact-version + /// fallback requires the complete metadata format. + format: MetadataFormat, + }, + ExtractVersion { + name: String, + spec: String, + version: String, + full: Arc, + }, +} + +/// Result of one provider job. +pub enum ManifestJobDone { + Full { + name: String, + data: ManifestFullData, + }, + Version { + name: String, + spec: String, + manifest: Arc, + }, +} + +/// Lower-level manifest provider used by the demand BFS loop. +#[cfg_attr(target_arch = "wasm32", async_trait(?Send))] +#[cfg_attr(not(target_arch = "wasm32"), async_trait)] +pub trait ManifestProvider: RegistryClient + Clone + Send + Sync + 'static { + /// Execute one manifest job. The provider owns I/O, persistence, and + /// parse/extract offloading; scheduling, waiters, and inflight + /// de-duplication stay in the BFS loop. + async fn execute_manifest_job(&self, job: ManifestJob) -> Result; +} From c3c7d7aaaf864296199973f550da0a5a686f0050 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 06:24:32 +0800 Subject: [PATCH 02/21] fix(ci): stabilize pm view and web worker checks --- crates/pm/src/cmd/view.rs | 41 +++++++++++++++++-- .../webpackLoaders/polyfills/nodePolyFills.ts | 16 ++++---- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/crates/pm/src/cmd/view.rs b/crates/pm/src/cmd/view.rs index e5bfaad79..8a952f4e1 100644 --- a/crates/pm/src/cmd/view.rs +++ b/crates/pm/src/cmd/view.rs @@ -10,6 +10,11 @@ use crate::util::user_config::get_registry; /// View package information from registry, similar to npm view pub async fn view(package_spec: &str) -> Result<()> { + let registry_url = get_registry(); + view_with_registry(package_spec, ®istry_url).await +} + +async fn view_with_registry(package_spec: &str, registry_url: &str) -> Result<()> { tracing::debug!("Viewing package: {package_spec}"); // Parse package specification @@ -18,9 +23,8 @@ pub async fn view(package_spec: &str) -> Result<()> { tracing::debug!("Resolved package: {name} (spec: {version_spec})"); // Fetch full manifest directly from registry (Complete format for display, no ETag) - let registry_url = get_registry(); let (full_manifest, _etag) = - fetch_full_manifest_fresh(®istry_url, name, MetadataFormat::Complete) + fetch_full_manifest_fresh(registry_url, name, MetadataFormat::Complete) .await .map_err(|e| anyhow!("Failed to fetch package info for {}: {}", package_spec, e))?; @@ -356,12 +360,41 @@ mod tests { /// because the registry service used ETag caching. #[tokio::test] async fn test_view_twice_no_304_error() { + use mockito::Matcher; + + let mut server = mockito::Server::new_async().await; + let manifest = r#"{ + "name": "is-odd", + "description": "mock package", + "dist-tags": { "latest": "1.0.0" }, + "versions": { + "1.0.0": { + "name": "is-odd", + "version": "1.0.0", + "description": "mock package", + "dist": {} + } + } + }"#; + let mock = server + .mock("GET", "/is-odd") + .match_header("accept", "application/json") + .match_header("if-none-match", Matcher::Missing) + .with_status(200) + .with_header("content-type", "application/json") + .with_header("etag", "\"mock-etag\"") + .with_body(manifest) + .expect(2) + .create_async() + .await; + // First view - should succeed - let result1 = view("is-odd").await; + let result1 = view_with_registry("is-odd", &server.url()).await; assert!(result1.is_ok(), "First view failed: {:?}", result1.err()); // Second view - should also succeed (not fail with 304 error) - let result2 = view("is-odd").await; + let result2 = view_with_registry("is-odd", &server.url()).await; assert!(result2.is_ok(), "Second view failed: {:?}", result2.err()); + mock.assert_async().await; } } diff --git a/packages/utoo-web/src/webpackLoaders/polyfills/nodePolyFills.ts b/packages/utoo-web/src/webpackLoaders/polyfills/nodePolyFills.ts index cb0329793..e64702b62 100644 --- a/packages/utoo-web/src/webpackLoaders/polyfills/nodePolyFills.ts +++ b/packages/utoo-web/src/webpackLoaders/polyfills/nodePolyFills.ts @@ -3,15 +3,15 @@ import * as workerThreads from "./workerThreadsPolyfill"; const buffer = require("buffer"); self.Buffer = buffer.Buffer; -const process = require("process"); -const originalCwd = process.cwd; -process.cwd = () => { +const nodeProcess = require("process"); +const originalCwd = nodeProcess.cwd; +nodeProcess.cwd = () => { // @ts-ignore return workerThreads.workerData?.cwd || originalCwd?.() || "/"; }; -if (!process.versions) process.versions = {}; -if (!process.versions.node) process.versions.node = "24.0.0"; -self.process = process; +if (!nodeProcess.versions) nodeProcess.versions = {}; +if (!nodeProcess.versions.node) nodeProcess.versions.node = "24.0.0"; +self.process = nodeProcess; self.global = self; const path = require("path"); @@ -243,8 +243,8 @@ export default { path, "node:path": path, - process, - "node:process": process, + process: nodeProcess, + "node:process": nodeProcess, get url() { return require("url"); From 14d38c9a71de3535c8ebc9f57e7fd1336af46d24 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 07:07:13 +0800 Subject: [PATCH 03/21] ci(pm): make defender setup best effort --- .github/workflows/pm-ci.yml | 7 ++++++- .github/workflows/pm-e2e-bench.yml | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pm-ci.yml b/.github/workflows/pm-ci.yml index aa57cc134..a691c2c22 100644 --- a/.github/workflows/pm-ci.yml +++ b/.github/workflows/pm-ci.yml @@ -56,7 +56,12 @@ jobs: - name: Disable Windows Defender if: runner.os == 'Windows' shell: powershell - run: Set-MpPreference -DisableRealtimeMonitoring $true + run: | + try { + Set-MpPreference -DisableRealtimeMonitoring $true -ErrorAction Stop + } catch { + Write-Warning "Unable to disable Windows Defender real-time monitoring: $_" + } # Add: Configure Git longpaths on Windows - name: Configure Git (Windows) diff --git a/.github/workflows/pm-e2e-bench.yml b/.github/workflows/pm-e2e-bench.yml index c2ac3f7fe..7505c0319 100644 --- a/.github/workflows/pm-e2e-bench.yml +++ b/.github/workflows/pm-e2e-bench.yml @@ -254,7 +254,12 @@ jobs: - uses: actions/checkout@v4 - name: Disable Windows Defender shell: powershell - run: Set-MpPreference -DisableRealtimeMonitoring $true + run: | + try { + Set-MpPreference -DisableRealtimeMonitoring $true -ErrorAction Stop + } catch { + Write-Warning "Unable to disable Windows Defender real-time monitoring: $_" + } - name: Init git submodules run: git submodule update --init --recursive --depth 1 - name: Setup Rust toolchain @@ -408,7 +413,12 @@ jobs: - uses: actions/checkout@v4 - name: Disable Windows Defender shell: powershell - run: Set-MpPreference -DisableRealtimeMonitoring $true + run: | + try { + Set-MpPreference -DisableRealtimeMonitoring $true -ErrorAction Stop + } catch { + Write-Warning "Unable to disable Windows Defender real-time monitoring: $_" + } - name: Setup node uses: actions/setup-node@v4 with: From 21ea59ba9df51b5df09e0cda2140f6dc7211b9fd Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 07:32:35 +0800 Subject: [PATCH 04/21] ci(pm): make bench cargo cache best effort --- .github/workflows/pm-e2e-bench.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pm-e2e-bench.yml b/.github/workflows/pm-e2e-bench.yml index 7505c0319..d88076579 100644 --- a/.github/workflows/pm-e2e-bench.yml +++ b/.github/workflows/pm-e2e-bench.yml @@ -133,6 +133,7 @@ jobs: targets: x86_64-unknown-linux-gnu - name: Cache cargo uses: Swatinem/rust-cache@v2 + continue-on-error: true with: shared-key: pm-build-linux cache-bin: false @@ -200,6 +201,7 @@ jobs: targets: aarch64-apple-darwin - name: Cache cargo uses: Swatinem/rust-cache@v2 + continue-on-error: true with: shared-key: pm-build-mac-arm64 cache-bin: false @@ -232,6 +234,7 @@ jobs: run: rustup target add x86_64-apple-darwin - name: Cache cargo uses: Swatinem/rust-cache@v2 + continue-on-error: true with: shared-key: pm-build-mac-x64 cache-bin: false @@ -269,6 +272,7 @@ jobs: targets: x86_64-pc-windows-msvc - name: Cache cargo uses: Swatinem/rust-cache@v2 + continue-on-error: true with: shared-key: pm-build-windows cache-bin: false From 604aa300396bf32e53093f974f8c077ef28bb267 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 01:52:06 +0800 Subject: [PATCH 05/21] perf(pm): parse version manifests from vec buffers --- crates/ruborist/src/service/manifest.rs | 86 +++++++++++++++++++++---- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/crates/ruborist/src/service/manifest.rs b/crates/ruborist/src/service/manifest.rs index 8dd9dc10b..e73251321 100644 --- a/crates/ruborist/src/service/manifest.rs +++ b/crates/ruborist/src/service/manifest.rs @@ -14,11 +14,13 @@ use super::fetch::{ use super::http::get_client; use crate::model::manifest::{CoreVersionManifest, FullManifest}; -/// Parse JSON bytes on rayon's CPU thread pool (native) or inline -/// (wasm32). Keeps the tokio runtime free of `simd_json` work so other -/// in-flight manifest fetches keep driving network IO while this one -/// parses. -pub(crate) async fn parse_json_off_runtime(bytes: Bytes) -> Result +/// Parse a JSON buffer on rayon's CPU thread pool (native) or inline +/// (wasm32). The buffer is consumed because `simd_json` mutates it in-place. +/// Keeps the tokio runtime free of `simd_json` work so other in-flight +/// manifest fetches keep driving network IO while this one parses. +pub(crate) async fn parse_json_vec_off_runtime( + mut parse_buf: Vec, +) -> Result where T: serde::de::DeserializeOwned + Send + 'static, { @@ -26,7 +28,6 @@ where { let (tx, rx) = tokio::sync::oneshot::channel(); rayon::spawn(move || { - let mut parse_buf = bytes.to_vec(); let result = simd_json::serde::from_slice::(&mut parse_buf) .map_err(|e| anyhow!("JSON parse error: {e}")); let _ = tx.send(result); @@ -36,7 +37,6 @@ where } #[cfg(target_arch = "wasm32")] { - let mut parse_buf = bytes.to_vec(); simd_json::serde::from_slice::(&mut parse_buf) .map_err(|e| anyhow!("JSON parse error: {e}")) } @@ -71,7 +71,8 @@ pub(crate) async fn parse_full_manifest_off_runtime( } #[cfg(target_arch = "wasm32")] { - let mut manifest: FullManifest = parse_json_off_runtime(raw_bytes.clone()).await?; + let mut manifest: FullManifest = + parse_json_vec_off_runtime(raw_bytes.clone().to_vec()).await?; manifest.raw = raw_bytes; Ok(manifest) } @@ -220,6 +221,28 @@ pub async fn fetch_full_manifest_fresh( } } +#[cfg(not(target_arch = "wasm32"))] +async fn read_body_vec(mut response: reqwest::Response) -> Result, FetchError> { + let capacity = response + .content_length() + .and_then(|len| usize::try_from(len).ok()) + .unwrap_or(0); + let mut body = Vec::with_capacity(capacity); + while let Some(chunk) = response.chunk().await.map_err(classify_reqwest_error)? { + body.extend_from_slice(&chunk); + } + Ok(body) +} + +#[cfg(target_arch = "wasm32")] +async fn read_body_vec(response: reqwest::Response) -> Result, FetchError> { + response + .bytes() + .await + .map(|bytes| bytes.to_vec()) + .map_err(classify_reqwest_error) +} + /// Options for fetching a version manifest. pub struct FetchVersionManifestOptions<'a> { pub registry_url: &'a str, @@ -230,6 +253,17 @@ pub struct FetchVersionManifestOptions<'a> { /// Fetch version manifest bytes with retry, without parsing. pub async fn fetch_version_manifest_bytes(opts: FetchVersionManifestOptions<'_>) -> Result { + fetch_version_manifest_vec(opts).await.map(Bytes::from) +} + +/// Fetch version manifest into a mutable parse buffer with retry. +/// +/// Unlike full manifests, exact-version manifests do not need to keep raw +/// response bytes for later extraction. Reading directly into `Vec` avoids +/// the hot-path `Bytes -> Vec` copy before `simd_json` parses in place. +pub(crate) async fn fetch_version_manifest_vec( + opts: FetchVersionManifestOptions<'_>, +) -> Result> { let url = format!("{}/{}/{}", opts.registry_url, opts.name, opts.spec); let accept = match opts.format { @@ -251,7 +285,7 @@ pub async fn fetch_version_manifest_bytes(opts: FetchVersionManifestOptions<'_>) .map_err(classify_reqwest_error)?; if response.status().is_success() { - response.bytes().await.map_err(classify_reqwest_error) + read_body_vec(response).await } else { Err(classify_status(response.status(), &url)) } @@ -271,6 +305,36 @@ pub async fn fetch_version_manifest_bytes(opts: FetchVersionManifestOptions<'_>) pub async fn fetch_version_manifest( opts: FetchVersionManifestOptions<'_>, ) -> Result { - let bytes = fetch_version_manifest_bytes(opts).await?; - parse_json_off_runtime::(bytes).await + let bytes = fetch_version_manifest_vec(opts).await?; + parse_json_vec_off_runtime::(bytes).await +} + +#[cfg(test)] +mod tests { + use serde::Deserialize; + + use super::*; + + #[derive(Debug, Deserialize, PartialEq)] + struct TinyManifest { + name: String, + version: String, + } + + #[tokio::test] + async fn parse_json_vec_off_runtime_consumes_mutable_buffer() { + let parsed = parse_json_vec_off_runtime::( + br#"{"name":"demo","version":"1.0.0"}"#.to_vec(), + ) + .await + .unwrap(); + + assert_eq!( + parsed, + TinyManifest { + name: "demo".to_string(), + version: "1.0.0".to_string(), + } + ); + } } From 5af3c3b9ae9d8d298793109f02fe5bc6bcc34ef3 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:09:25 +0800 Subject: [PATCH 06/21] perf(pm): extract requested core from full manifest parse --- crates/ruborist/src/service/manifest.rs | 80 ++++++++++++++++++++----- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/crates/ruborist/src/service/manifest.rs b/crates/ruborist/src/service/manifest.rs index e73251321..b68eb975b 100644 --- a/crates/ruborist/src/service/manifest.rs +++ b/crates/ruborist/src/service/manifest.rs @@ -13,6 +13,7 @@ use super::fetch::{ }; use super::http::get_client; use crate::model::manifest::{CoreVersionManifest, FullManifest}; +use crate::resolver::version::resolve_target_version; /// Parse a JSON buffer on rayon's CPU thread pool (native) or inline /// (wasm32). The buffer is consumed because `simd_json` mutates it in-place. @@ -49,21 +50,44 @@ where pub(crate) async fn parse_full_manifest_off_runtime( raw_bytes: Bytes, ) -> Result { + parse_full_manifest_with_core_off_runtime(raw_bytes, None) + .await + .map(|(manifest, _)| manifest) +} + +pub(crate) type FullManifestParseResult = (FullManifest, Option<(String, CoreVersionManifest)>); + +fn parse_full_manifest_with_core_sync( + raw_bytes: Bytes, + spec: Option, +) -> Result { + // simd_json mutates the parse buffer; copy inside the worker so + // response-body bytes can stay immutable until parsing starts. + let mut parse_buf = raw_bytes.to_vec(); + let mut manifest: FullManifest = simd_json::serde::from_slice::(&mut parse_buf) + .map_err(|e| anyhow!("JSON parse error: {e}"))?; + manifest.raw = raw_bytes; + + let speculative = spec.and_then(|spec| { + resolve_target_version((&manifest).into(), &spec) + .ok() + .and_then(|version| manifest.get_core_version(&version).map(|core| (spec, core))) + }); + + Ok((manifest, speculative)) +} + +/// Parse a full wire-fetched manifest and optionally extract the current BFS +/// edge's version in the same off-runtime worker task. +pub(crate) async fn parse_full_manifest_with_core_off_runtime( + raw_bytes: Bytes, + spec: Option, +) -> Result { #[cfg(not(target_arch = "wasm32"))] { let (tx, rx) = tokio::sync::oneshot::channel(); rayon::spawn(move || { - let result = (|| -> Result { - // simd_json mutates the parse buffer; copy inside the worker so - // response-body bytes can stay immutable until parsing starts. - let mut parse_buf = raw_bytes.to_vec(); - let mut manifest: FullManifest = - simd_json::serde::from_slice::(&mut parse_buf) - .map_err(|e| anyhow!("JSON parse error: {e}"))?; - manifest.raw = raw_bytes; - - Ok(manifest) - })(); + let result = parse_full_manifest_with_core_sync(raw_bytes, spec); let _ = tx.send(result); }); rx.await @@ -71,10 +95,7 @@ pub(crate) async fn parse_full_manifest_off_runtime( } #[cfg(target_arch = "wasm32")] { - let mut manifest: FullManifest = - parse_json_vec_off_runtime(raw_bytes.clone().to_vec()).await?; - manifest.raw = raw_bytes; - Ok(manifest) + parse_full_manifest_with_core_sync(raw_bytes, spec) } } @@ -337,4 +358,33 @@ mod tests { } ); } + + #[tokio::test] + async fn parse_full_manifest_with_core_extracts_requested_spec() { + let raw = Bytes::from_static( + br#"{ + "name":"demo", + "dist-tags":{"latest":"1.0.0"}, + "versions":{ + "1.0.0":{ + "name":"demo", + "version":"1.0.0", + "dist":{"tarball":"https://registry.example/demo-1.0.0.tgz"} + } + } + }"#, + ); + + let (manifest, speculative) = + parse_full_manifest_with_core_off_runtime(raw.clone(), Some("latest".to_string())) + .await + .unwrap(); + + assert_eq!(manifest.name, "demo"); + assert_eq!(manifest.raw, raw); + let (spec, core) = speculative.unwrap(); + assert_eq!(spec, "latest"); + assert_eq!(core.name, "demo"); + assert_eq!(core.version, "1.0.0"); + } } From a7138a9f3a8817ca5b042ef43166048f7febeaeb Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:12:34 +0800 Subject: [PATCH 07/21] perf(pm): add mock manifest provider --- crates/ruborist/src/traits/registry.rs | 101 +++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/crates/ruborist/src/traits/registry.rs b/crates/ruborist/src/traits/registry.rs index 7110ed0f8..eb00fd7d3 100644 --- a/crates/ruborist/src/traits/registry.rs +++ b/crates/ruborist/src/traits/registry.rs @@ -298,6 +298,7 @@ pub mod mock { use super::*; /// Internal package data for mock registry. + #[derive(Clone)] struct MockPackage { name: String, dist_tags: HashMap, @@ -305,6 +306,7 @@ pub mod mock { } /// Mock registry client that returns predefined packages. + #[derive(Clone)] pub struct MockRegistryClient { packages: HashMap, } @@ -393,4 +395,103 @@ pub mod mock { })) } } + + #[cfg_attr(target_arch = "wasm32", async_trait::async_trait(?Send))] + #[cfg_attr(not(target_arch = "wasm32"), async_trait::async_trait)] + impl crate::service::ManifestProvider for MockRegistryClient { + async fn execute_manifest_job( + &self, + job: crate::service::ManifestJob, + ) -> Result { + use crate::service::{ManifestFullData, ManifestJob, ManifestJobDone}; + + match job { + ManifestJob::Full { name, spec } => { + let full = self.fetch_full_manifest(&name).await?; + let speculative = spec.and_then(|spec| { + resolve_target_version((&*full).into(), &spec) + .ok() + .and_then(|version| { + full.get_core_version(&version) + .map(|core| (spec, Arc::new(core))) + }) + }); + Ok(ManifestJobDone::Full { + name, + data: ManifestFullData::Full { + manifest: full, + speculative, + }, + }) + } + ManifestJob::Version { + name, + spec, + fetch_spec, + format: _, + } => { + let manifest = self.fetch_version_manifest(&name, &fetch_spec).await?; + Ok(ManifestJobDone::Version { + name, + spec, + manifest, + }) + } + ManifestJob::ExtractVersion { + name, + spec, + version, + full, + } => { + let manifest = + full.get_core_version(&version) + .map(Arc::new) + .ok_or_else(|| { + MockError(format!( + "Version {version} not found in manifest for {name}" + )) + })?; + Ok(ManifestJobDone::Version { + name, + spec, + manifest, + }) + } + } + } + } + + #[tokio::test] + async fn mock_registry_executes_manifest_provider_jobs() { + let mut registry = MockRegistryClient::new(); + registry.add_package( + "demo", + "1.0.0", + CoreVersionManifest { + name: "demo".to_string(), + version: "1.0.0".to_string(), + ..Default::default() + }, + ); + + let done = crate::service::ManifestProvider::execute_manifest_job( + ®istry, + crate::service::ManifestJob::Full { + name: "demo".to_string(), + spec: Some("latest".to_string()), + }, + ) + .await + .unwrap(); + + let crate::service::ManifestJobDone::Full { data, .. } = done else { + panic!("expected full manifest job result"); + }; + let crate::service::ManifestFullData::Full { speculative, .. } = data else { + panic!("expected full manifest data"); + }; + let (spec, manifest) = speculative.unwrap(); + assert_eq!(spec, "latest"); + assert_eq!(manifest.version, "1.0.0"); + } } From 5b0b1ce3cc8269f36b1a00412c94ab8e1362a2e7 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:14:43 +0800 Subject: [PATCH 08/21] perf(pm): fan out registry http clients --- crates/ruborist/src/service/http.rs | 42 ++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/crates/ruborist/src/service/http.rs b/crates/ruborist/src/service/http.rs index fd5ca63c4..8be2cae46 100644 --- a/crates/ruborist/src/service/http.rs +++ b/crates/ruborist/src/service/http.rs @@ -9,7 +9,7 @@ //! | | | //! | true (npmmirror) false (npmjs.org) //! | | | -//! | fetch_version_manifest resolve_full_manifest +//! | fetch version job fetch full manifest job //! | GET /{name}/{spec} GET /{name} //! | Accept: abbreviated Accept: abbreviated //! | | + If-None-Match: {etag} @@ -29,7 +29,7 @@ //! v //! +-----------------------------------------------------------------+ //! | http.rs -- HTTP Client (this file) | -//! | global singleton reqwest::Client (LazyLock) | +//! | global reqwest::Client pool (LazyLock) | //! | rustls TLS + no_proxy + env proxy + CachingResolver | //! +-----------------------------------------------------------------+ //! | @@ -76,6 +76,8 @@ //! WASM targets skip DNS entirely (browser handles it). use std::sync::LazyLock; +#[cfg(not(target_arch = "wasm32"))] +use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; use anyhow::{Context, Result, anyhow}; @@ -85,16 +87,48 @@ use anyhow::{Context, Result, anyhow}; /// error, which silently-stalled sockets never raise. const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); -/// Global HTTP client with connection pooling and DNS caching. +/// Number of independent registry HTTP client pools. +/// +/// GHA npmjs pcap showed bun spreading resolve traffic across a few +/// Cloudflare edge IPs while a single reqwest pool concentrated requests on +/// one IP. Four pools keeps the model small but gives the resolver enough +/// independent keep-alive pools to fan out when npmjs/full-manifest +/// concurrency is raised. +#[cfg(not(target_arch = "wasm32"))] +const CLIENT_POOL_SIZE: usize = 4; + +/// Global HTTP clients with connection pooling and DNS caching. /// -/// Stores `Result` so that proxy-configuration errors are +/// Stores `Result, String>` so that proxy-configuration errors are /// surfaced to callers instead of panicking or calling `process::exit`. +#[cfg(not(target_arch = "wasm32"))] +static HTTP_CLIENTS: LazyLock, String>> = LazyLock::new(|| { + (0..CLIENT_POOL_SIZE) + .map(|_| client_builder().and_then(|b| b.build().context("Failed to build reqwest client"))) + .collect::>>() + .map_err(|e| e.to_string()) +}); + +#[cfg(not(target_arch = "wasm32"))] +static CLIENT_COUNTER: AtomicUsize = AtomicUsize::new(0); + +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn get_client() -> Result<&'static reqwest::Client> { + let clients = HTTP_CLIENTS.as_ref().map_err(|e| anyhow!("{e}"))?; + let idx = CLIENT_COUNTER.fetch_add(1, Ordering::Relaxed) % clients.len(); + Ok(&clients[idx]) +} + +/// WASM targets retain a single browser-backed client; there is no native TCP +/// connection pool to fan out. +#[cfg(target_arch = "wasm32")] static HTTP_CLIENT: LazyLock> = LazyLock::new(|| { client_builder() .and_then(|b| b.build().context("Failed to build reqwest client")) .map_err(|e| e.to_string()) }); +#[cfg(target_arch = "wasm32")] pub(crate) fn get_client() -> Result<&'static reqwest::Client> { HTTP_CLIENT.as_ref().map_err(|e| anyhow!("{e}")) } From 56101d88ac167391cf2e8d20a06bb270a2d995dd Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:25:22 +0800 Subject: [PATCH 09/21] perf(pm): add resolver pm wiring --- crates/pm/src/helper/ruborist_context.rs | 5 +- crates/pm/src/main.rs | 2 +- crates/pm/src/service/pipeline/receiver.rs | 2 +- crates/pm/src/util/json.rs | 60 --------------- crates/pm/src/util/logger.rs | 15 ---- crates/pm/src/util/manifest_store.rs | 48 +++++------- crates/pm/src/util/project_cache.rs | 4 +- crates/pm/src/util/retry.rs | 2 +- crates/pm/src/util/user_config.rs | 90 +++++++++++++++++++++- 9 files changed, 115 insertions(+), 113 deletions(-) diff --git a/crates/pm/src/helper/ruborist_context.rs b/crates/pm/src/helper/ruborist_context.rs index b47def019..49bebc738 100644 --- a/crates/pm/src/helper/ruborist_context.rs +++ b/crates/pm/src/helper/ruborist_context.rs @@ -12,7 +12,8 @@ use crate::util::logger::ProgressReceiver; use crate::util::manifest_store::DiskManifestStore; use crate::util::project_cache; use crate::util::user_config::{ - get_catalogs, get_manifests_concurrency_limit, get_peer_deps, get_registry, get_supports_semver, + get_catalogs, get_peer_deps, get_registry, get_resolver_manifests_concurrency_limit, + get_supports_semver, }; /// Tokio-based glob implementation. @@ -57,7 +58,7 @@ impl Context { cache_dir: Some(get_cache_dir()), manifest_store: Self::manifest_store(), warm_project_cache, - concurrency: get_manifests_concurrency_limit().await, + concurrency: get_resolver_manifests_concurrency_limit().await, peer_deps: get_peer_deps().await, glob: TokioGlob, receiver, diff --git a/crates/pm/src/main.rs b/crates/pm/src/main.rs index 3ec93fc1c..f9235cf33 100644 --- a/crates/pm/src/main.rs +++ b/crates/pm/src/main.rs @@ -92,7 +92,7 @@ struct Cli { #[arg(long, global = true, action = clap::ArgAction::SetTrue)] legacy_peer_deps: Option, - /// Maximum concurrent manifest fetches (default: 64) + /// Maximum concurrent manifest fetches (default: 64; npmjs/non-semver resolver: 256) #[arg(long, global = true)] manifests_concurrency_limit: Option, diff --git a/crates/pm/src/service/pipeline/receiver.rs b/crates/pm/src/service/pipeline/receiver.rs index 376a5b50c..283e6b626 100644 --- a/crates/pm/src/service/pipeline/receiver.rs +++ b/crates/pm/src/service/pipeline/receiver.rs @@ -123,7 +123,7 @@ mod tests { })); // Should not forward other events - receiver.on_event(BuildEvent::PreloadStart { count: 10 }); + receiver.on_event(BuildEvent::LevelStart { node_count: 10 }); // Only one message should be in the download channel assert!(channels.download_rx.try_recv().is_ok()); diff --git a/crates/pm/src/util/json.rs b/crates/pm/src/util/json.rs index 1c75f1d6c..9243152e2 100644 --- a/crates/pm/src/util/json.rs +++ b/crates/pm/src/util/json.rs @@ -1,9 +1,6 @@ -use std::fs::File; -use std::io::{self, BufWriter, Write}; use std::path::Path; use anyhow::{Context, Result}; -use serde::Serialize; use serde::de::DeserializeOwned; /// Read and parse a JSON file into the specified type. @@ -15,34 +12,6 @@ pub async fn read_json_file(path: &Path) -> Result { serde_json::from_slice(&bytes).with_context(|| format!("Failed to parse JSON from {path:?}")) } -/// Serialize `value` as compact JSON and stream it to `path` through a -/// [`BufWriter`], skipping the intermediate `Vec` that -/// `serde_json::to_vec` + `std::fs::write` would allocate. -/// -/// Synchronous on purpose: the caller in [`crate::util::manifest_store`] runs -/// it on a dedicated OS thread so manifest persistence never touches the -/// async runtime's worker or blocking pool. Async callers should wrap this -/// in `tokio::task::spawn_blocking` (or write the async-aware counterpart -/// when one is needed). -/// -/// The parent directory of `path` must already exist; this helper does *not* -/// `mkdir -p`. The cost-benefit of "try the write first, recover on -/// `NotFound`" is policy-level (warm-cache rewrites want to skip the extra -/// syscall every time), so the recovery loop lives at the call site, not -/// here. A missing parent surfaces as [`io::ErrorKind::NotFound`] for the -/// caller to match on. -/// -/// Serialization failures — rare for `derive(Serialize)` types, possible for -/// hand-written impls or maps with non-string keys — are folded into -/// [`io::Error`] via [`io::Error::other`] so the whole API speaks one error -/// type and callers can keep matching on [`io::ErrorKind`]. -pub fn write_compact_sync(path: &Path, value: &T) -> io::Result<()> { - let file = File::create(path)?; - let mut writer = BufWriter::new(file); - serde_json::to_writer(&mut writer, value).map_err(io::Error::other)?; - writer.flush() -} - /// Load package.json from a directory path and deserialize into the caller's /// chosen view type `T`. Use a full `PackageJson` for root projects, or a /// minimal view (e.g. `ScriptsView`) for node_modules to avoid parsing @@ -166,35 +135,6 @@ mod tests { assert_eq!(view.scripts.get("test").unwrap(), "node build/test.js"); } - #[tokio::test] - async fn write_compact_sync_round_trips_through_read_json_file() { - let dir = tempdir().unwrap(); - let path = dir.path().join("out.json"); - let value = json!({ - "name": "test", - "version": "1.0.0", - "deps": ["a", "b", "c"], - }); - - super::write_compact_sync(&path, &value).unwrap(); - - let read_back: Value = read_json_file(&path).await.unwrap(); - assert_eq!(read_back, value); - - // Compact form: no inter-token whitespace. - let raw = std::fs::read_to_string(&path).unwrap(); - assert!(!raw.contains(": ")); - assert!(!raw.contains(", ")); - } - - #[test] - fn write_compact_sync_requires_existing_parent_directory() { - let dir = tempdir().unwrap(); - let path = dir.path().join("missing").join("out.json"); - let err = super::write_compact_sync(&path, &json!({})).unwrap_err(); - assert_eq!(err.kind(), std::io::ErrorKind::NotFound); - } - #[tokio::test] async fn test_error_handling() { let non_existent_path = Path::new("non_existent.json"); diff --git a/crates/pm/src/util/logger.rs b/crates/pm/src/util/logger.rs index 6ba124bb2..864d60193 100644 --- a/crates/pm/src/util/logger.rs +++ b/crates/pm/src/util/logger.rs @@ -219,21 +219,6 @@ impl utoo_ruborist::progress::EventReceiver for ProgressReceiver { } use utoo_ruborist::progress::BuildEvent; match event { - BuildEvent::PreloadStart { count } | BuildEvent::PreloadQueued { count } => { - PROGRESS_BAR.inc_length(count as u64); - } - BuildEvent::PreloadFetching { name } => { - log_progress(&format!("fetching {}", name)); - } - BuildEvent::PreloadProgress { name, .. } => { - PROGRESS_BAR.inc(1); - log_progress(&format!("resolved {}", name)); - } - BuildEvent::PreloadComplete { success, failed } => { - PROGRESS_BAR.set_position(0); - PROGRESS_BAR.set_length(0); - log_progress(&format!("preload: {} ok, {} failed", success, failed)); - } BuildEvent::DependencyCount { count } => { PROGRESS_BAR.inc_length(count as u64); } diff --git a/crates/pm/src/util/manifest_store.rs b/crates/pm/src/util/manifest_store.rs index a2c60c65e..3fb846af9 100644 --- a/crates/pm/src/util/manifest_store.rs +++ b/crates/pm/src/util/manifest_store.rs @@ -11,11 +11,9 @@ //! Serialization and file writes run on a dedicated writer thread so manifest //! persistence does not occupy async runtime workers or Tokio's blocking pool. -use std::fs; -use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::sync::mpsc::{self, SyncSender, TrySendError}; +use std::sync::mpsc::{self, Sender}; use std::thread::JoinHandle; use async_trait::async_trait; @@ -23,11 +21,7 @@ use serde::Serialize; use utoo_ruborist::model::manifest::CoreVersionManifest; use utoo_ruborist::service::{ManifestStore, VersionsInfo}; -use crate::util::json::{read_json_file, write_compact_sync}; - -/// Opportunistic writer backlog. If disk stalls beyond this, new cache writes -/// are dropped instead of letting resolver memory grow without bound. -const MANIFEST_WRITE_QUEUE_CAPACITY: usize = 1024; +use crate::util::json::read_json_file; pub struct DiskManifestStore { cache_dir: PathBuf, @@ -112,13 +106,13 @@ enum ManifestWriteJob { } struct ManifestWriter { - tx: SyncSender, + tx: Sender, handle: JoinHandle<()>, } impl ManifestWriter { fn spawn() -> Self { - let (tx, rx) = mpsc::sync_channel(MANIFEST_WRITE_QUEUE_CAPACITY); + let (tx, rx) = mpsc::channel(); let handle = std::thread::Builder::new() .name("utoo-manifest-store".to_string()) .spawn(move || { @@ -138,14 +132,8 @@ impl ManifestWriter { } fn enqueue(&self, job: ManifestWriteJob) { - match self.tx.try_send(job) { - Ok(()) => {} - Err(TrySendError::Full(_)) => { - tracing::debug!("Manifest store writer queue full; dropping cache write"); - } - Err(TrySendError::Disconnected(_)) => { - tracing::debug!("Manifest store writer stopped before accepting write"); - } + if self.tx.send(job).is_err() { + tracing::debug!("Manifest store writer stopped before accepting write"); } } @@ -157,23 +145,27 @@ impl ManifestWriter { } } -/// Apply the manifest-cache write policy on top of -/// [`crate::util::json::write_compact_sync`]: on `NotFound`, create the -/// parent directory once and retry — this is how the resolver hot path -/// avoids the up-front `mkdir` syscall on every warm-cache rewrite. All -/// errors are swallowed at the `debug` log level because the disk cache is -/// opportunistic; a dropped write only costs a future cache miss. +/// Serialize `value` and write to `path`. On `NotFound`, create the parent +/// directory and retry once — saves the mkdir syscall on every warm-cache +/// rewrite. Errors are logged at debug; disk cache is opportunistic. fn write_json_sync(path: &Path, value: &T) { - match write_compact_sync(path, value) { + let bytes = match serde_json::to_vec(value) { + Ok(b) => b, + Err(e) => { + tracing::debug!("Failed to serialize {path:?}: {e}"); + return; + } + }; + match std::fs::write(path, &bytes) { Ok(()) => {} - Err(e) if e.kind() == ErrorKind::NotFound => { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { if let Some(parent) = path.parent() - && let Err(e) = fs::create_dir_all(parent) + && let Err(e) = std::fs::create_dir_all(parent) { tracing::debug!("Failed to create {parent:?}: {e}"); return; } - if let Err(e) = write_compact_sync(path, value) { + if let Err(e) = std::fs::write(path, &bytes) { tracing::debug!("Failed to write {path:?}: {e}"); } } diff --git a/crates/pm/src/util/project_cache.rs b/crates/pm/src/util/project_cache.rs index f4b0269ff..ccfb27de9 100644 --- a/crates/pm/src/util/project_cache.rs +++ b/crates/pm/src/util/project_cache.rs @@ -1,7 +1,7 @@ //! Disk persistence for ruborist's project-level manifest cache. //! -//! Stored at `/node_modules/.utoo-manifest.json`. Used to skip the -//! preload phase on warm installs. +//! Stored at `/node_modules/.utoo-manifest.json`. Used to warm the +//! demand resolver's in-memory manifest cache across installs. use std::path::{Path, PathBuf}; diff --git a/crates/pm/src/util/retry.rs b/crates/pm/src/util/retry.rs index 896141422..a210d9f6f 100644 --- a/crates/pm/src/util/retry.rs +++ b/crates/pm/src/util/retry.rs @@ -30,7 +30,7 @@ pub fn build_dns_cached_client() -> reqwest::Client { .read_timeout(std::time::Duration::from_secs(30)) // Timeout for individual read operations // No total timeout - large files (e.g. node binary ~100MB) need longer download time // No pool_max_idle_per_host - let reqwest manage connections freely - // Concurrency is controlled by semaphore in preload service + // Concurrency is controlled by each caller's semaphore. .build() .expect("Failed to build reqwest client") } diff --git a/crates/pm/src/util/user_config.rs b/crates/pm/src/util/user_config.rs index 34ee45a34..994033d7d 100644 --- a/crates/pm/src/util/user_config.rs +++ b/crates/pm/src/util/user_config.rs @@ -132,11 +132,27 @@ pub fn get_install_scope() -> InstallScope { INSTALL_SCOPE.get().copied().unwrap_or_default() } -// Manifest fetch concurrency configuration -static MANIFESTS_CONCURRENCY_LIMIT: LazyLock> = - LazyLock::new(|| ConfigValue::new("manifests-concurrency-limit", 64)); +// Manifest fetch concurrency configuration. +// +// Keep the user-visible/default tarball download limit at 64. Registry +// resolution can opt into a higher default for non-semver registries via +// `get_resolver_manifests_concurrency_limit`; tarball download/extract still +// uses `get_manifests_concurrency_limit_sync` so install IO is not inflated. +const DEFAULT_MANIFESTS_CONCURRENCY_LIMIT: usize = 64; +const NON_SEMVER_RESOLVER_CONCURRENCY_LIMIT: usize = 256; + +static MANIFESTS_CONCURRENCY_LIMIT: LazyLock> = LazyLock::new(|| { + ConfigValue::new( + "manifests-concurrency-limit", + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT, + ) +}); +static MANIFESTS_CONCURRENCY_CLI_SET: OnceLock<()> = OnceLock::new(); pub fn set_manifests_concurrency_limit(value: Option) { + if value.is_some() { + let _ = MANIFESTS_CONCURRENCY_CLI_SET.set(()); + } MANIFESTS_CONCURRENCY_LIMIT.set(value); } @@ -144,6 +160,30 @@ pub async fn get_manifests_concurrency_limit() -> usize { MANIFESTS_CONCURRENCY_LIMIT.get().await } +fn resolver_manifest_concurrency_limit( + configured_limit: usize, + cli_set: bool, + supports_semver: Option, +) -> usize { + if !cli_set + && configured_limit == DEFAULT_MANIFESTS_CONCURRENCY_LIMIT + && supports_semver == Some(false) + { + NON_SEMVER_RESOLVER_CONCURRENCY_LIMIT + } else { + configured_limit + } +} + +pub async fn get_resolver_manifests_concurrency_limit() -> usize { + let limit = get_manifests_concurrency_limit().await; + resolver_manifest_concurrency_limit( + limit, + MANIFESTS_CONCURRENCY_CLI_SET.get().is_some(), + get_supports_semver(), + ) +} + pub fn get_manifests_concurrency_limit_sync() -> usize { MANIFESTS_CONCURRENCY_LIMIT.get_sync() } @@ -358,6 +398,50 @@ mod tests { assert!(!config.parse_config_value("anything")); } + #[test] + fn test_resolver_manifest_concurrency_raises_npmjs_default() { + assert_eq!( + resolver_manifest_concurrency_limit( + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT, + false, + Some(false), + ), + NON_SEMVER_RESOLVER_CONCURRENCY_LIMIT + ); + } + + #[test] + fn test_resolver_manifest_concurrency_preserves_explicit_limit() { + assert_eq!( + resolver_manifest_concurrency_limit(32, true, Some(false)), + 32 + ); + assert_eq!( + resolver_manifest_concurrency_limit( + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT, + true, + Some(false), + ), + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT + ); + } + + #[test] + fn test_resolver_manifest_concurrency_preserves_semver_default() { + assert_eq!( + resolver_manifest_concurrency_limit( + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT, + false, + Some(true), + ), + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT + ); + assert_eq!( + resolver_manifest_concurrency_limit(DEFAULT_MANIFESTS_CONCURRENCY_LIMIT, false, None), + DEFAULT_MANIFESTS_CONCURRENCY_LIMIT + ); + } + #[tokio::test] async fn test_cache_dir_from_config_file() -> Result<()> { let temp_dir = TempDir::new()?; From 8b32ee57150edf702175303d6353e2572f77f931 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:38:22 +0800 Subject: [PATCH 10/21] perf(pm): execute resolver manifest provider jobs --- crates/ruborist/src/service/provider.rs | 7 + crates/ruborist/src/service/registry.rs | 255 ++++++++++++++++++++++++ crates/ruborist/src/traits/registry.rs | 7 + 3 files changed, 269 insertions(+) diff --git a/crates/ruborist/src/service/provider.rs b/crates/ruborist/src/service/provider.rs index 65609a0ed..dff19b844 100644 --- a/crates/ruborist/src/service/provider.rs +++ b/crates/ruborist/src/service/provider.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use super::cache::VersionsInfo; use super::manifest::MetadataFormat; @@ -80,3 +81,9 @@ pub trait ManifestProvider: RegistryClient + Clone + Send + Sync + 'static { /// de-duplication stay in the BFS loop. async fn execute_manifest_job(&self, job: ManifestJob) -> Result; } + +/// Raw full-manifest bytes fetched by a provider before parsing. +pub(crate) enum ProviderFullManifestBytes { + Fresh { bytes: Bytes, etag: Option }, + NotModified { versions: Arc }, +} diff --git a/crates/ruborist/src/service/registry.rs b/crates/ruborist/src/service/registry.rs index be2a8b61f..a27aab8f7 100644 --- a/crates/ruborist/src/service/registry.rs +++ b/crates/ruborist/src/service/registry.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use anyhow::anyhow; +use async_trait::async_trait; /// Get current timestamp in seconds since UNIX epoch. /// Works on both native and WASM targets. @@ -43,6 +44,9 @@ use dashmap::DashSet; use super::cache::{PackageCache, Versions, VersionsInfo}; use super::manifest; +use super::provider::{ + ManifestFullData, ManifestJob, ManifestJobDone, ManifestProvider, ProviderFullManifestBytes, +}; use super::store::{ManifestStore, NoopStore}; use crate::model::manifest::{CoreVersionManifest, FullManifest, extract_core_version_off_runtime}; use crate::resolver::semver::normalize_spec; @@ -199,6 +203,78 @@ enum FullManifestResult { NotModified, } +#[cfg_attr(target_arch = "wasm32", async_trait(?Send))] +#[cfg_attr(not(target_arch = "wasm32"), async_trait)] +impl ManifestProvider for UnifiedRegistry { + async fn execute_manifest_job(&self, job: ManifestJob) -> Result { + match job { + ManifestJob::Full { name, spec } => { + let data = match self.fetch_full_manifest_job(&name).await? { + ProviderFullManifestBytes::Fresh { bytes, etag } => { + let (manifest, speculative) = + manifest::parse_full_manifest_with_core_off_runtime(bytes, spec) + .await?; + let manifest = Arc::new(manifest); + let speculative = speculative.map(|(spec, core)| { + let core = Arc::new(core); + self.store_version_manifest(&name, Arc::clone(&core)); + (spec, core) + }); + let versions = Arc::new(VersionsInfo { + versions: Versions { + version_list: manifest.versions.clone(), + dist_tags: manifest.dist_tags.clone(), + }, + etag, + last_updated: current_timestamp_secs(), + }); + self.store.store_versions(&name, versions); + ManifestFullData::Full { + manifest, + speculative, + } + } + ProviderFullManifestBytes::NotModified { versions } => { + ManifestFullData::Versions(versions) + } + }; + + Ok(ManifestJobDone::Full { name, data }) + } + ManifestJob::Version { + name, + spec, + fetch_spec, + format, + } => { + let manifest = self + .fetch_version_job_manifest(&name, &spec, &fetch_spec, format) + .await?; + Ok(ManifestJobDone::Version { + name, + spec, + manifest, + }) + } + ManifestJob::ExtractVersion { + name, + spec, + version, + full, + } => { + let manifest = self + .extract_version_job_manifest(&name, &spec, version, full) + .await?; + Ok(ManifestJobDone::Version { + name, + spec, + manifest, + }) + } + } + } +} + impl UnifiedRegistry { /// Create a builder for `UnifiedRegistry`. pub fn builder() -> UnifiedRegistryBuilder { @@ -220,6 +296,87 @@ impl UnifiedRegistry { &self.cache } + fn store_version_manifest(&self, name: &str, manifest: Arc) { + let version = manifest.version.clone(); + self.store.store_version_manifest(name, &version, manifest); + } + + async fn fetch_full_manifest_job( + &self, + name: &str, + ) -> Result { + let store_versions = self.store.load_versions(name).await.map(Arc::new); + let etag = store_versions.as_ref().and_then(|v| v.etag.clone()); + + match manifest::fetch_full_manifest_bytes(manifest::FetchManifestOptions { + registry_url: &self.registry_url, + name, + format: manifest::MetadataFormat::Abbreviated, + etag: etag.as_deref(), + }) + .await + .map_err(RegistryError)? + { + manifest::FetchManifestBytesResult::Ok(bytes, etag) => { + Ok(ProviderFullManifestBytes::Fresh { bytes, etag }) + } + manifest::FetchManifestBytesResult::NotModified => { + let versions = store_versions.ok_or_else(|| { + RegistryError(anyhow!( + "304 Not Modified without cached versions for {name}" + )) + })?; + Ok(ProviderFullManifestBytes::NotModified { versions }) + } + } + } + + async fn extract_version_job_manifest( + &self, + name: &str, + _spec: &str, + version: String, + full: Arc, + ) -> Result, RegistryError> { + let (resolved_version, manifest) = extract_core_version_off_runtime(full, version).await; + let manifest = manifest.ok_or_else(|| { + RegistryError(anyhow!( + "Version {} not found in manifest for {}", + resolved_version, + name + )) + })?; + self.store_version_manifest(name, Arc::clone(&manifest)); + Ok(manifest) + } + + async fn fetch_version_job_manifest( + &self, + name: &str, + _spec: &str, + fetch_spec: &str, + format: manifest::MetadataFormat, + ) -> Result, RegistryError> { + if deno_semver::Version::parse_from_npm(fetch_spec).is_ok() + && let Some(manifest) = self.store.load_version_manifest(name, fetch_spec).await + { + return Ok(Arc::new(manifest)); + } + + let manifest = Arc::new( + manifest::fetch_version_manifest(manifest::FetchVersionManifestOptions { + registry_url: &self.registry_url, + name, + spec: fetch_spec, + format, + }) + .await + .map_err(RegistryError)?, + ); + self.store_version_manifest(name, Arc::clone(&manifest)); + Ok(manifest) + } + /// Resolve full manifest through memory → store → network with ETag validation. /// /// Single-flight cache flow: @@ -509,6 +666,10 @@ impl RegistryClient for UnifiedRegistry { self.supports_semver } + fn registry_url(&self) -> &str { + &self.registry_url + } + fn cache_version_manifest(&self, name: &str, spec: &str, manifest: Arc) { self.cache .set_version_manifest(name.to_string(), spec.to_string(), manifest); @@ -572,7 +733,44 @@ impl RegistryClient for UnifiedRegistry { #[cfg(test)] mod tests { + use std::sync::Mutex; + use super::*; + use crate::service::{ManifestJob, ManifestJobDone, ManifestProvider, ManifestStore}; + + #[derive(Default)] + struct RecordingStore { + stored_versions: Mutex>, + } + + #[async_trait] + impl ManifestStore for RecordingStore { + async fn load_versions(&self, _name: &str) -> Option { + None + } + + async fn load_version_manifest( + &self, + _name: &str, + _version: &str, + ) -> Option { + None + } + + fn store_versions(&self, _name: &str, _info: Arc) {} + + fn store_version_manifest( + &self, + name: &str, + version: &str, + _manifest: Arc, + ) { + self.stored_versions + .lock() + .unwrap() + .push((name.to_string(), version.to_string())); + } + } #[test] fn test_is_npm_registry() { @@ -642,4 +840,61 @@ mod tests { // Both registries share the same cache assert!(Arc::ptr_eq(®istry1.cache, ®istry2.cache)); } + + #[tokio::test] + async fn test_unified_registry_executes_extract_manifest_provider_job() { + let store = Arc::new(RecordingStore::default()); + let registry = UnifiedRegistry::builder() + .registry("https://registry.npmmirror.com") + .store(store.clone()) + .build(); + + let (full, _) = manifest::parse_full_manifest_with_core_off_runtime( + bytes::Bytes::from_static( + br#"{ + "name":"provider-extract-demo", + "dist-tags":{"latest":"1.0.0"}, + "versions":{ + "1.0.0":{ + "name":"provider-extract-demo", + "version":"1.0.0", + "dist":{"tarball":"https://registry.example/demo-1.0.0.tgz"} + } + } + }"#, + ), + None, + ) + .await + .unwrap(); + + let done = ManifestProvider::execute_manifest_job( + ®istry, + ManifestJob::ExtractVersion { + name: "provider-extract-demo".to_string(), + spec: "latest".to_string(), + version: "1.0.0".to_string(), + full: Arc::new(full), + }, + ) + .await + .unwrap(); + + let ManifestJobDone::Version { + spec, + manifest: returned, + .. + } = done + else { + panic!("expected version manifest job"); + }; + + assert_eq!(spec, "latest"); + assert_eq!(returned.name, "provider-extract-demo"); + assert_eq!(returned.version, "1.0.0"); + assert_eq!( + store.stored_versions.lock().unwrap().as_slice(), + &[("provider-extract-demo".to_string(), "1.0.0".to_string())] + ); + } } diff --git a/crates/ruborist/src/traits/registry.rs b/crates/ruborist/src/traits/registry.rs index eb00fd7d3..e30d67112 100644 --- a/crates/ruborist/src/traits/registry.rs +++ b/crates/ruborist/src/traits/registry.rs @@ -133,6 +133,13 @@ pub trait RegistryClient { false } + /// Base registry URL used by schedulers that need to classify raw work. + /// + /// Implementations without a concrete URL can keep the default. + fn registry_url(&self) -> &str { + "" + } + /// Fetch full package manifest from registry. /// /// Returns the complete package manifest with all versions, wrapped in From 48257d56bf6010138c9cf2b50e3b8f3659d91337 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:43:17 +0800 Subject: [PATCH 11/21] perf(pm): share resolver placement helpers --- crates/ruborist/src/resolver/builder.rs | 150 ++++++++++++++++-------- 1 file changed, 101 insertions(+), 49 deletions(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index b0bf2794c..d5e216923 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -651,6 +651,97 @@ pub async fn process_dependency( } } +/// Place an already-resolved registry package into the graph. +/// +/// The demand resolver uses this after a manifest job completes; the legacy +/// resolver path also goes through it so placement semantics stay shared. +pub fn process_dependency_with_resolved( + graph: &mut DependencyGraph, + node_index: NodeIndex, + edge_info: &DependencyEdgeInfo, + resolved: &ResolvedPackage, + config: &BuildDepsConfig, +) -> ProcessResult { + match graph.find_compatible_node(node_index, &edge_info.name, &edge_info.spec) { + FindResult::Reuse(existing_index) => { + graph.mark_dependency_resolved(edge_info.edge_id, existing_index); + update_node_type_from_edge(graph, node_index, existing_index, &edge_info.edge_type); + ProcessResult::Reused(existing_index) + } + FindResult::Conflict(conflict_parent) | FindResult::New(conflict_parent) => { + let new_node = create_package_node(&edge_info.name, resolved, conflict_parent, graph); + let new_index = graph.add_node(new_node); + graph.add_physical_edge(conflict_parent, new_index); + graph.mark_dependency_resolved(edge_info.edge_id, new_index); + update_node_type_from_edge(graph, node_index, new_index, &edge_info.edge_type); + add_edges_from( + graph, + new_index, + &*resolved.manifest, + &EdgeContext::new(config.peer_deps, DevDeps::Exclude), + ); + ProcessResult::Created(new_index) + } + } +} + +fn chain_err( + graph: &DependencyGraph, + parent: NodeIndex, + edge: &DependencyEdgeInfo, + inner: ResolveError, +) -> ResolveError { + let mut chain = graph.logical_ancestry(parent); + chain.push((edge.name.clone(), edge.spec.clone())); + ResolveError::WithChain { + chain, + source: Box::new(inner), + } +} + +fn handle_processed( + graph: &DependencyGraph, + receiver: &E, + parent: NodeIndex, + edge: &DependencyEdgeInfo, + processed: &ProcessResult, + next_level: &mut Vec, +) { + match processed { + ProcessResult::Created(idx) => { + if let Some(node) = graph.get_node(*idx) { + receiver.on_event(BuildEvent::Resolved { + name: &edge.name, + version: &node.version, + }); + if let NodeManifest::Registry(ref manifest) = node.manifest { + let parent_path = graph.get_node(parent).map(|p| p.path.as_path()); + receiver.on_event(BuildEvent::PackagePlaced { + package: manifest.as_ref().into(), + path: &node.path, + parent_path, + }); + } + } + next_level.push(*idx); + } + ProcessResult::Reused(idx) => { + if let Some(node) = graph.get_node(*idx) { + receiver.on_event(BuildEvent::Reused { + name: &edge.name, + version: &node.version, + }); + } + } + ProcessResult::Skipped => { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + } + } +} + /// Build the complete dependency tree using BFS traversal. /// /// This is the main entry point for dependency resolution. It starts from @@ -837,56 +928,17 @@ async fn run_bfs_phase( receiver.on_event(BuildEvent::Resolving { name: &edge_info.name, }); - let result = process_dependency(graph, registry, node_index, &edge_info, config) + let processed = process_dependency(graph, registry, node_index, &edge_info, config) .await - .map_err(|inner| { - let mut chain = graph.logical_ancestry(node_index); - chain.push((edge_info.name.clone(), edge_info.spec.clone())); - ResolveError::WithChain { - chain, - source: Box::new(inner), - } - }); - match result? { - ProcessResult::Created(idx) => { - // Extract node info for events - if let Some(node) = graph.get_node(idx) { - receiver.on_event(BuildEvent::Resolved { - name: &edge_info.name, - version: &node.version, - }); - - // Send PackagePlaced for pipeline cloning - if let NodeManifest::Registry(ref manifest) = node.manifest { - // Get parent path for dependency ordering - let parent_path = graph - .get_node(node_index) - .map(|parent| parent.path.as_path()); - receiver.on_event(BuildEvent::PackagePlaced { - package: manifest.as_ref().into(), - path: &node.path, - parent_path, - }); - } - } - - next_level.push(idx); - } - ProcessResult::Reused(idx) => { - if let Some(node) = graph.get_node(idx) { - receiver.on_event(BuildEvent::Reused { - name: &edge_info.name, - version: &node.version, - }); - } - } - ProcessResult::Skipped => { - receiver.on_event(BuildEvent::Skipped { - name: &edge_info.name, - spec: &edge_info.spec, - }); - } - } + .map_err(|inner| chain_err(graph, node_index, &edge_info, inner))?; + handle_processed( + graph, + receiver, + node_index, + &edge_info, + &processed, + &mut next_level, + ); } } From 0520ceae0221cf0918b3aa07a541cfb32f6dda61 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:49:27 +0800 Subject: [PATCH 12/21] perf(pm): require manifest provider for builder --- crates/ruborist/src/resolver/builder.rs | 44 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index d5e216923..87d2d9c7d 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -32,6 +32,7 @@ use crate::model::node::EdgeType; use crate::model::package_json::PackageJson; use crate::resolver::preload::{PreloadConfig, preload_manifests}; use crate::resolver::registry::{ResolveError, resolve_registry_dep}; +use crate::service::ManifestProvider; use crate::spec::{Catalogs, PackageSpec, Protocol}; use crate::traits::progress::{BuildEvent, EventReceiver, NoopReceiver}; use crate::traits::registry::{RegistryClient, ResolvedPackage}; @@ -758,11 +759,15 @@ fn handle_processed( /// // Add initial dependency edges to root... /// build_deps(&mut graph, ®istry, PeerDeps::Include).await?; /// ``` -pub async fn build_deps( +pub async fn build_deps( graph: &mut DependencyGraph, registry: &R, peer_deps: PeerDeps, -) -> Result<(), ResolveError> { +) -> Result<(), ResolveError> +where + R: ManifestProvider, + R::Error: Send, +{ let config = BuildDepsConfig::default().with_peer_deps(peer_deps); build_deps_with_config(graph, registry, config, &NoopReceiver).await } @@ -780,12 +785,17 @@ pub async fn build_deps( /// * `registry` - Registry client for fetching packages /// * `peer_deps` - How to handle peer dependencies /// * `receiver` - Event receiver for handling build events -pub async fn build_deps_with_receiver( +pub async fn build_deps_with_receiver( graph: &mut DependencyGraph, registry: &R, peer_deps: PeerDeps, receiver: &E, -) -> Result<(), ResolveError> { +) -> Result<(), ResolveError> +where + R: ManifestProvider, + R::Error: Send, + E: EventReceiver, +{ let config = BuildDepsConfig::default().with_peer_deps(peer_deps); build_deps_with_config(graph, registry, config, receiver).await } @@ -810,12 +820,17 @@ pub async fn build_deps_with_receiver( /// /// build_deps_with_config(&mut graph, ®istry, config, &receiver).await?; /// ``` -pub async fn build_deps_with_config( +pub async fn build_deps_with_config( graph: &mut DependencyGraph, registry: &R, config: BuildDepsConfig, receiver: &E, -) -> Result<(), ResolveError> { +) -> Result<(), ResolveError> +where + R: ManifestProvider, + R::Error: Send, + E: EventReceiver, +{ tracing::debug!( "Starting dependency tree build, peer_deps: {:?}, concurrency: {}, skip_preload: {}", config.peer_deps, @@ -973,10 +988,14 @@ use std::path::Path; /// let pkg: PackageJson = serde_json::from_str(&pkg_content)?; /// let lock = resolve(&pkg, ®istry).await?; /// ``` -pub async fn resolve( +pub async fn resolve( pkg: &PackageJson, registry: &R, -) -> Result> { +) -> Result> +where + R: ManifestProvider, + R::Error: Send, +{ resolve_with_options(pkg, registry, PeerDeps::Include, &NoopReceiver).await } @@ -987,12 +1006,17 @@ pub async fn resolve( /// * `registry` - Registry client for fetching packages /// * `peer_deps` - How to handle peer dependencies /// * `receiver` - Event receiver for progress tracking -pub async fn resolve_with_options( +pub async fn resolve_with_options( pkg: &PackageJson, registry: &R, peer_deps: PeerDeps, receiver: &E, -) -> Result> { +) -> Result> +where + R: ManifestProvider, + R::Error: Send, + E: EventReceiver, +{ // Create graph with root node let mut graph = DependencyGraph::from_package_json(PathBuf::from("."), pkg.clone()); From 32ef04cacdced8759b6dcc8736bf719674e97ed4 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:52:45 +0800 Subject: [PATCH 13/21] perf(pm): thread warm project cache into resolver config --- crates/ruborist/src/resolver/builder.rs | 11 ++++++++++- crates/ruborist/src/service/api.rs | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index 87d2d9c7d..b98f9b313 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -32,7 +32,7 @@ use crate::model::node::EdgeType; use crate::model::package_json::PackageJson; use crate::resolver::preload::{PreloadConfig, preload_manifests}; use crate::resolver::registry::{ResolveError, resolve_registry_dep}; -use crate::service::ManifestProvider; +use crate::service::{ManifestProvider, ProjectCacheData}; use crate::spec::{Catalogs, PackageSpec, Protocol}; use crate::traits::progress::{BuildEvent, EventReceiver, NoopReceiver}; use crate::traits::registry::{RegistryClient, ResolvedPackage}; @@ -118,6 +118,8 @@ pub struct BuildDepsConfig { /// Catalog definitions for the `catalog:` dependency protocol. /// Key `""` = default catalog, other keys = named catalogs. pub catalogs: Catalogs, + /// Host-provided project cache used to seed resolver manifest state. + pub warm_project_cache: Option, } impl Default for BuildDepsConfig { @@ -130,6 +132,7 @@ impl Default for BuildDepsConfig { git_clone_cache: Arc::new(GitCloneCache::new()), http_fetch_cache: Arc::new(HttpFetchCache::new()), catalogs: HashMap::new(), + warm_project_cache: None, } } } @@ -164,6 +167,12 @@ impl BuildDepsConfig { self.catalogs = catalogs; self } + + /// Set the host-provided warm project cache. + pub fn with_warm_project_cache(mut self, warm_project_cache: Option) -> Self { + self.warm_project_cache = warm_project_cache; + self + } } /// Snapshot of node dependency flags to avoid borrowing conflicts. diff --git a/crates/ruborist/src/service/api.rs b/crates/ruborist/src/service/api.rs index 878b357a1..7df51a303 100644 --- a/crates/ruborist/src/service/api.rs +++ b/crates/ruborist/src/service/api.rs @@ -239,7 +239,8 @@ where .with_peer_deps(peer_deps) .with_concurrency(concurrency) .with_skip_preload(skip_preload) - .with_catalogs(catalogs); + .with_catalogs(catalogs) + .with_warm_project_cache(warm_project_cache); if let Some(dir) = cache_dir { config = config.with_cache_dir(dir); } From 1efa749a3a86c79840d078e81143a47932bb3a97 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:56:44 +0800 Subject: [PATCH 14/21] perf(pm): add resolver demand mainloop --- crates/ruborist/src/resolver/builder.rs | 1215 ++++++++++++++++++----- crates/ruborist/src/service/api.rs | 79 +- 2 files changed, 981 insertions(+), 313 deletions(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index b98f9b313..e7a1be14b 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -7,19 +7,17 @@ //! - Hoisting (placing packages as high as possible in the tree) //! - Override rules //! - Different dependency types (prod, dev, peer, optional) -//! - Parallel manifest preloading for performance +//! - Demand-driven parallel manifest jobs for performance //! -//! # Two-Phase Resolution +//! # Demand BFS Resolution //! -//! The builder uses a two-phase approach for optimal performance: -//! 1. **Preload Phase**: Parallel fetch of all manifests to warm up caches -//! 2. **Build Phase**: Sequential BFS traversal reading from cache -//! -//! This separation allows for maximum parallelism during network I/O -//! while keeping the graph building logic simple and deterministic. +//! The builder owns breadth-first traversal, per-run manifest cache, waiters, +//! and inflight de-duplication. Provider tasks only execute concrete manifest +//! jobs such as fetch, parse, extract, and persistence. +use futures::stream::{FuturesUnordered, StreamExt}; use petgraph::graph::NodeIndex; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::path::PathBuf; use std::sync::Arc; @@ -28,13 +26,19 @@ use anyhow::Context as _; use crate::model::graph::{DependencyGraph, FindResult, PackageNode}; use crate::model::manifest::NodeManifest; +use crate::model::manifest::{CoreVersionManifest, FullManifest, VersionsRef}; use crate::model::node::EdgeType; use crate::model::package_json::PackageJson; -use crate::resolver::preload::{PreloadConfig, preload_manifests}; use crate::resolver::registry::{ResolveError, resolve_registry_dep}; -use crate::service::{ManifestProvider, ProjectCacheData}; -use crate::spec::{Catalogs, PackageSpec, Protocol}; +use crate::resolver::semver::normalize_spec; +use crate::resolver::version::resolve_target_version; +use crate::service::{ + ManifestFullData, ManifestJob, ManifestJobDone, ManifestProvider, MetadataFormat, + ProjectCacheData, +}; +use crate::spec::{Catalogs, PackageSpec, Protocol, SpecStr}; use crate::traits::progress::{BuildEvent, EventReceiver, NoopReceiver}; +use crate::traits::registry::RegistryError; use crate::traits::registry::{RegistryClient, ResolvedPackage}; /// Dispatch a git/github spec to the real `gix`-backed resolver when the @@ -100,15 +104,15 @@ pub use super::edges::{ }; pub use crate::model::node::{DevDeps, PeerDeps}; +const DEFAULT_CONCURRENCY: usize = 128; + /// Configuration for dependency resolution. #[derive(Debug, Clone)] pub struct BuildDepsConfig { /// How to handle peer dependencies. pub peer_deps: PeerDeps, - /// Maximum number of concurrent manifest fetches during preload + /// Maximum number of concurrent manifest jobs. pub concurrency: usize, - /// Whether to skip preload phase (useful when cache is already warm) - pub skip_preload: bool, /// Cache directory for git clones (defaults to `~/.cache/nm`) pub cache_dir: Option, /// Shared dedup cache for concurrent git clone operations @@ -118,7 +122,7 @@ pub struct BuildDepsConfig { /// Catalog definitions for the `catalog:` dependency protocol. /// Key `""` = default catalog, other keys = named catalogs. pub catalogs: Catalogs, - /// Host-provided project cache used to seed resolver manifest state. + /// Host-provided project cache used to seed the resolver-owned manifest cache. pub warm_project_cache: Option, } @@ -126,8 +130,7 @@ impl Default for BuildDepsConfig { fn default() -> Self { Self { peer_deps: PeerDeps::Skip, - concurrency: crate::resolver::preload::DEFAULT_CONCURRENCY, - skip_preload: false, + concurrency: DEFAULT_CONCURRENCY, cache_dir: dirs::home_dir().map(|d| d.join(".cache/nm")), git_clone_cache: Arc::new(GitCloneCache::new()), http_fetch_cache: Arc::new(HttpFetchCache::new()), @@ -150,12 +153,6 @@ impl BuildDepsConfig { self } - /// Create config that skips preload phase - pub fn with_skip_preload(mut self, skip: bool) -> Self { - self.skip_preload = skip; - self - } - /// Set the cache directory for git clones pub fn with_cache_dir(mut self, cache_dir: PathBuf) -> Self { self.cache_dir = Some(cache_dir); @@ -168,7 +165,6 @@ impl BuildDepsConfig { self } - /// Set the host-provided warm project cache. pub fn with_warm_project_cache(mut self, warm_project_cache: Option) -> Self { self.warm_project_cache = warm_project_cache; self @@ -185,44 +181,6 @@ struct NodeFlags { is_peer: bool, } -/// Gather all unresolved deps from root and workspace nodes for preloading. -/// -/// Only registry specs (e.g. `^4.17.0`) are collected. `catalog:` specs are -/// resolved at edge creation time, so by the time this runs they are already -/// concrete registry specs. -fn gather_preload_deps(graph: &DependencyGraph, peer_deps: PeerDeps) -> Vec<(String, String)> { - use crate::spec::SpecStr; - use std::collections::HashSet; - - let mut deps = HashSet::new(); - - let collect = |node_index: NodeIndex, deps: &mut HashSet<(String, String)>| { - for (_, edge) in graph.get_dependency_edges(node_index) { - if edge.valid { - continue; - } - if peer_deps == PeerDeps::Skip && edge.edge_type == EdgeType::Peer { - continue; - } - if edge.spec.is_registry_spec() { - deps.insert((edge.name.clone(), edge.spec.clone())); - } - } - }; - - collect(graph.root_index, &mut deps); - - for node_index in graph.graph.node_indices() { - if let Some(node) = graph.get_node(node_index) - && node.is_workspace() - { - collect(node_index, &mut deps); - } - } - - deps.into_iter().collect() -} - /// Create a new package node for a resolved dependency. /// /// # Arguments @@ -661,97 +619,6 @@ pub async fn process_dependency( } } -/// Place an already-resolved registry package into the graph. -/// -/// The demand resolver uses this after a manifest job completes; the legacy -/// resolver path also goes through it so placement semantics stay shared. -pub fn process_dependency_with_resolved( - graph: &mut DependencyGraph, - node_index: NodeIndex, - edge_info: &DependencyEdgeInfo, - resolved: &ResolvedPackage, - config: &BuildDepsConfig, -) -> ProcessResult { - match graph.find_compatible_node(node_index, &edge_info.name, &edge_info.spec) { - FindResult::Reuse(existing_index) => { - graph.mark_dependency_resolved(edge_info.edge_id, existing_index); - update_node_type_from_edge(graph, node_index, existing_index, &edge_info.edge_type); - ProcessResult::Reused(existing_index) - } - FindResult::Conflict(conflict_parent) | FindResult::New(conflict_parent) => { - let new_node = create_package_node(&edge_info.name, resolved, conflict_parent, graph); - let new_index = graph.add_node(new_node); - graph.add_physical_edge(conflict_parent, new_index); - graph.mark_dependency_resolved(edge_info.edge_id, new_index); - update_node_type_from_edge(graph, node_index, new_index, &edge_info.edge_type); - add_edges_from( - graph, - new_index, - &*resolved.manifest, - &EdgeContext::new(config.peer_deps, DevDeps::Exclude), - ); - ProcessResult::Created(new_index) - } - } -} - -fn chain_err( - graph: &DependencyGraph, - parent: NodeIndex, - edge: &DependencyEdgeInfo, - inner: ResolveError, -) -> ResolveError { - let mut chain = graph.logical_ancestry(parent); - chain.push((edge.name.clone(), edge.spec.clone())); - ResolveError::WithChain { - chain, - source: Box::new(inner), - } -} - -fn handle_processed( - graph: &DependencyGraph, - receiver: &E, - parent: NodeIndex, - edge: &DependencyEdgeInfo, - processed: &ProcessResult, - next_level: &mut Vec, -) { - match processed { - ProcessResult::Created(idx) => { - if let Some(node) = graph.get_node(*idx) { - receiver.on_event(BuildEvent::Resolved { - name: &edge.name, - version: &node.version, - }); - if let NodeManifest::Registry(ref manifest) = node.manifest { - let parent_path = graph.get_node(parent).map(|p| p.path.as_path()); - receiver.on_event(BuildEvent::PackagePlaced { - package: manifest.as_ref().into(), - path: &node.path, - parent_path, - }); - } - } - next_level.push(*idx); - } - ProcessResult::Reused(idx) => { - if let Some(node) = graph.get_node(*idx) { - receiver.on_event(BuildEvent::Reused { - name: &edge.name, - version: &node.version, - }); - } - } - ProcessResult::Skipped => { - receiver.on_event(BuildEvent::Skipped { - name: &edge.name, - spec: &edge.spec, - }); - } - } -} - /// Build the complete dependency tree using BFS traversal. /// /// This is the main entry point for dependency resolution. It starts from @@ -811,21 +678,20 @@ where /// Build the complete dependency tree with full configuration. /// -/// This is the most flexible entry point for dependency resolution. It performs: -/// 1. **Preload Phase** (unless skipped): Parallel fetch of all manifests to warm up caches -/// 2. **Build Phase**: Sequential BFS traversal reading from cache +/// This is the most flexible entry point for dependency resolution. It runs +/// demand BFS and schedules manifest jobs only when the current frontier needs +/// them. /// /// # Arguments /// * `graph` - The dependency graph (should have root node and initial edges) /// * `registry` - Registry client for fetching packages -/// * `config` - Build configuration (concurrency, peer_deps, skip_preload) +/// * `config` - Build configuration (concurrency, peer_deps, cache_dir, etc.) /// * `receiver` - Event receiver for handling build events /// /// # Example /// ```ignore /// let config = BuildDepsConfig::default() -/// .with_concurrency(50) -/// .with_skip_preload(true); // Skip preload if cache is warm +/// .with_concurrency(50); /// /// build_deps_with_config(&mut graph, ®istry, config, &receiver).await?; /// ``` @@ -835,135 +701,1008 @@ pub async fn build_deps_with_config( config: BuildDepsConfig, receiver: &E, ) -> Result<(), ResolveError> +where + R: ManifestProvider, + R::Error: Send, + E: EventReceiver, +{ + build_deps_with_config_output(graph, registry, config, receiver) + .await + .map(|_| ()) +} + +pub(crate) async fn build_deps_with_config_output( + graph: &mut DependencyGraph, + registry: &R, + config: BuildDepsConfig, + receiver: &E, +) -> Result> where R: ManifestProvider, R::Error: Send, E: EventReceiver, { tracing::debug!( - "Starting dependency tree build, peer_deps: {:?}, concurrency: {}, skip_preload: {}", + "Starting dependency tree build, peer_deps: {:?}, concurrency: {}", config.peer_deps, - config.concurrency, - config.skip_preload + config.concurrency ); - // Phase 1: Preload manifests in parallel (unless skipped) - run_preload_phase(graph, registry, &config, receiver).await; - - // Phase 2: BFS traversal to build the dependency tree - run_bfs_phase(graph, registry, &config, receiver).await?; + let manifest_cache = run_main_loop_bfs(graph, registry, &config, receiver).await?; receiver.on_event(BuildEvent::Complete { total_nodes: graph.graph.node_count(), }); - Ok(()) + Ok(manifest_cache) } -/// Run the preload phase to warm up the cache with manifests. -async fn run_preload_phase( - graph: &DependencyGraph, +type WaitingEdge = (NodeIndex, DependencyEdgeInfo); + +type VersionKey = (String, String); + +#[derive(Default)] +pub(crate) struct ResolverManifestCache { + entries: Vec<(String, String, Arc)>, +} + +impl ResolverManifestCache { + pub(crate) fn into_project_cache(self) -> ProjectCacheData { + let mut project_cache = ProjectCacheData::default(); + for (name, spec, manifest) in self.entries { + let version = manifest.version.clone(); + let pkg_cache = project_cache.cache.entry(name).or_default(); + pkg_cache.specs.insert(spec, version.clone()); + pkg_cache + .manifests + .entry(version) + .or_insert_with(|| (*manifest).clone()); + } + project_cache + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +enum FetchKey { + Full(String), + Version(String, String), +} + +impl ManifestJob { + fn key(&self) -> FetchKey { + match self { + Self::Full { name, .. } => FetchKey::Full(name.clone()), + Self::Version { name, spec, .. } | Self::ExtractVersion { name, spec, .. } => { + FetchKey::Version(name.clone(), spec.clone()) + } + } + } +} + +enum FetchDone { + Full { + name: String, + result: Result, + }, + Version { + name: String, + spec: String, + result: Result, String>, + }, +} + +impl FetchDone { + fn key(&self) -> FetchKey { + match self { + Self::Full { name, .. } => FetchKey::Full(name.clone()), + Self::Version { name, spec, .. } => FetchKey::Version(name.clone(), spec.clone()), + } + } +} + +type FetchFuture = tokio::task::JoinHandle; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum FetchPriority { + Demand, + Prefetch, +} + +#[derive(Default)] +struct FetchQueues { + demand: VecDeque, + prefetch: VecDeque, + queued: HashMap, + active: HashMap, +} + +impl FetchQueues { + fn enqueue(&mut self, request: ManifestJob, priority: FetchPriority) { + let key = request.key(); + if self.active.contains_key(&key) { + return; + } + + match (self.queued.get(&key).copied(), priority) { + (Some(FetchPriority::Demand), _) + | (Some(FetchPriority::Prefetch), FetchPriority::Prefetch) => {} + (Some(FetchPriority::Prefetch), FetchPriority::Demand) => { + self.queued.insert(key, FetchPriority::Demand); + self.demand.push_back(request); + } + (None, FetchPriority::Demand) => { + self.queued.insert(key, FetchPriority::Demand); + self.demand.push_back(request); + } + (None, FetchPriority::Prefetch) => { + self.queued.insert(key, FetchPriority::Prefetch); + self.prefetch.push_back(request); + } + } + } + + fn complete(&mut self, key: &FetchKey) { + self.queued.remove(key); + self.active.remove(key); + } + + fn pop_next(&mut self, prefetch_concurrency: usize) -> Option { + if let Some(request) = self.pop_priority(FetchPriority::Demand) { + return Some(request); + } + + let prefetch_concurrency = if self + .queued + .values() + .any(|priority| *priority == FetchPriority::Demand) + { + prefetch_concurrency + } else { + usize::MAX + }; + + if self.active_prefetches() >= prefetch_concurrency { + return None; + } + + self.pop_priority(FetchPriority::Prefetch) + } + + fn pop_priority(&mut self, priority: FetchPriority) -> Option { + loop { + let request = match priority { + FetchPriority::Demand => self.demand.pop_front(), + FetchPriority::Prefetch => self.prefetch.pop_front(), + }?; + let key = request.key(); + if self.queued.get(&key).copied() != Some(priority) { + continue; + } + self.queued.remove(&key); + self.active.insert(key, priority); + return Some(request); + } + } + + fn active_prefetches(&self) -> usize { + self.active + .values() + .filter(|priority| **priority == FetchPriority::Prefetch) + .count() + } +} + +fn prefetch_concurrency_limit(concurrency: usize) -> usize { + (concurrency / 4).max(1) +} + +#[derive(Default)] +struct ManifestState { + full_cache: HashMap>, + versions_cache: HashMap>, + version_cache: HashMap>, + full_waiters: HashMap>, + version_waiters: HashMap>, + full_failures: HashMap, + version_failures: HashMap, + fetch_queues: FetchQueues, +} + +impl ManifestState { + fn with_warm_project_cache(warm: Option<&ProjectCacheData>) -> Self { + let mut state = Self::default(); + let Some(warm) = warm else { + return state; + }; + for (name, pkg_cache) in &warm.cache { + for (spec, version) in &pkg_cache.specs { + let Some(manifest) = pkg_cache.manifests.get(version) else { + continue; + }; + let manifest = Arc::new(manifest.clone()); + state + .version_cache + .insert((name.clone(), spec.clone()), Arc::clone(&manifest)); + state + .version_cache + .entry((name.clone(), version.clone())) + .or_insert(manifest); + } + } + state + } + + fn into_resolver_cache(self) -> ResolverManifestCache { + ResolverManifestCache { + entries: self + .version_cache + .into_iter() + .map(|((name, spec), manifest)| (name, spec, manifest)) + .collect(), + } + } + + fn schedule_registry_fetch( + &mut self, + name: String, + spec: String, + supports_semver: bool, + priority: FetchPriority, + ) { + let (real_name, real_spec) = normalize_spec(&name, &spec); + if supports_semver { + let key = (real_name, real_spec); + if self.version_cache.contains_key(&key) || self.version_failures.contains_key(&key) { + return; + } + self.fetch_queues.enqueue( + ManifestJob::Version { + name: key.0.clone(), + spec: key.1.clone(), + fetch_spec: key.1, + format: version_metadata_format(supports_semver), + }, + priority, + ); + } else { + if self.full_cache.contains_key(&real_name) + || self.versions_cache.contains_key(&real_name) + || self.full_failures.contains_key(&real_name) + { + return; + } + self.fetch_queues.enqueue( + ManifestJob::Full { + name: real_name, + spec: Some(real_spec), + }, + priority, + ); + } + } + + fn enqueue_version_extract(&mut self, name: String, version: String, full: Arc) { + self.fetch_queues.enqueue( + ManifestJob::ExtractVersion { + name, + spec: version.clone(), + version, + full, + }, + FetchPriority::Demand, + ); + } + + fn enqueue_version_fetch(&mut self, name: String, fetch_spec: String, supports_semver: bool) { + self.fetch_queues.enqueue( + ManifestJob::Version { + name, + spec: fetch_spec.clone(), + fetch_spec, + format: version_metadata_format(supports_semver), + }, + FetchPriority::Demand, + ); + } + + fn schedule_transitive_prefetches( + &mut self, + manifest: &CoreVersionManifest, + peer_deps: PeerDeps, + supports_semver: bool, + ) { + for (name, spec) in collect_registry_prefetches(manifest, peer_deps) { + self.schedule_registry_fetch(name, spec, supports_semver, FetchPriority::Prefetch); + } + } + + fn apply_fetch_result( + &mut self, + done: FetchDone, + supports_semver: bool, + peer_deps: PeerDeps, + level_pending: &mut VecDeque, + ) { + let done_key = done.key(); + self.fetch_queues.complete(&done_key); + + match done { + FetchDone::Full { name, result } => { + match result { + Ok(ManifestFullData::Full { + manifest: full, + speculative, + }) => { + if let Some((resolved_spec, manifest)) = speculative { + self.version_cache + .insert((name.clone(), resolved_spec), Arc::clone(&manifest)); + self.version_cache + .entry((name.clone(), manifest.version.clone())) + .or_insert_with(|| Arc::clone(&manifest)); + self.schedule_transitive_prefetches( + &manifest, + peer_deps, + supports_semver, + ); + } + self.full_cache.insert(name.clone(), full); + } + Ok(ManifestFullData::Versions(versions)) => { + self.versions_cache.insert(name.clone(), versions); + } + Err(e) => { + self.full_failures.insert(name.clone(), e); + } + } + if let Some(waiters) = self.full_waiters.remove(&name) { + level_pending.extend(waiters); + } + } + FetchDone::Version { name, spec, result } => { + let key = (name, spec); + match result { + Ok(manifest) => { + self.version_cache + .insert(key.clone(), Arc::clone(&manifest)); + self.version_cache + .entry((key.0.clone(), manifest.version.clone())) + .or_insert_with(|| Arc::clone(&manifest)); + self.schedule_transitive_prefetches(&manifest, peer_deps, supports_semver); + } + Err(e) => { + self.version_failures.insert(key.clone(), e); + } + } + if let Some(waiters) = self.version_waiters.remove(&key) { + level_pending.extend(waiters); + } + } + } + } +} + +fn version_metadata_format(supports_semver: bool) -> MetadataFormat { + if supports_semver { + MetadataFormat::Abbreviated + } else { + MetadataFormat::Complete + } +} + +fn registry_error(message: impl Into) -> ResolveError +where + E: From, +{ + ResolveError::Registry(RegistryError(anyhow::anyhow!(message.into())).into()) +} + +async fn fetch_registry_manifest_inner(registry: R, request: ManifestJob) -> FetchDone +where + R: ManifestProvider, +{ + let key = request.key(); + match registry.execute_manifest_job(request).await { + Ok(done) => match done { + ManifestJobDone::Full { name, data } => FetchDone::Full { + name, + result: Ok(data), + }, + ManifestJobDone::Version { + name, + spec, + manifest, + } => FetchDone::Version { + name, + spec, + result: Ok(manifest), + }, + }, + Err(error) => match key { + FetchKey::Full(name) => FetchDone::Full { + name, + result: Err(format!("{error:#}")), + }, + FetchKey::Version(name, spec) => FetchDone::Version { + name, + spec, + result: Err(format!("{error:#}")), + }, + }, + } +} + +#[cfg(not(target_arch = "wasm32"))] +fn fetch_registry_manifest(registry: R, request: ManifestJob) -> FetchFuture +where + R: ManifestProvider, + R::Error: Send, +{ + tokio::spawn(fetch_registry_manifest_inner(registry, request)) +} + +#[cfg(target_arch = "wasm32")] +fn fetch_registry_manifest(registry: R, request: ManifestJob) -> FetchFuture +where + R: ManifestProvider, +{ + tokio::task::spawn_local(fetch_registry_manifest_inner(registry, request)) +} + +fn pump_fetches( + fetches: &mut FuturesUnordered, + fetch_queues: &mut FetchQueues, registry: &R, + concurrency: usize, +) where + R: ManifestProvider, + R::Error: Send, +{ + let prefetch_concurrency = prefetch_concurrency_limit(concurrency); + while fetches.len() < concurrency { + let Some(request) = fetch_queues.pop_next(prefetch_concurrency) else { + break; + }; + fetches.push(fetch_registry_manifest(registry.clone(), request)); + } +} + +fn try_reuse_dependency( + graph: &mut DependencyGraph, + parent: NodeIndex, + edge: &DependencyEdgeInfo, +) -> Option { + match graph.find_compatible_node(parent, &edge.name, &edge.spec) { + FindResult::Reuse(existing_index) => { + graph.mark_dependency_resolved(edge.edge_id, existing_index); + update_node_type_from_edge(graph, parent, existing_index, &edge.edge_type); + Some(ProcessResult::Reused(existing_index)) + } + FindResult::Conflict(_) | FindResult::New(_) => None, + } +} + +pub fn process_dependency_with_resolved( + graph: &mut DependencyGraph, + node_index: NodeIndex, + edge_info: &DependencyEdgeInfo, + resolved: &ResolvedPackage, config: &BuildDepsConfig, +) -> ProcessResult { + match graph.find_compatible_node(node_index, &edge_info.name, &edge_info.spec) { + FindResult::Reuse(existing_index) => { + graph.mark_dependency_resolved(edge_info.edge_id, existing_index); + update_node_type_from_edge(graph, node_index, existing_index, &edge_info.edge_type); + ProcessResult::Reused(existing_index) + } + FindResult::Conflict(conflict_parent) | FindResult::New(conflict_parent) => { + let new_node = create_package_node(&edge_info.name, resolved, conflict_parent, graph); + let new_index = graph.add_node(new_node); + graph.add_physical_edge(conflict_parent, new_index); + graph.mark_dependency_resolved(edge_info.edge_id, new_index); + update_node_type_from_edge(graph, node_index, new_index, &edge_info.edge_type); + add_edges_from( + graph, + new_index, + &*resolved.manifest, + &EdgeContext::new(config.peer_deps, DevDeps::Exclude), + ); + ProcessResult::Created(new_index) + } + } +} + +fn chain_err( + graph: &DependencyGraph, + parent: NodeIndex, + edge: &DependencyEdgeInfo, + inner: ResolveError, +) -> ResolveError { + let mut chain = graph.logical_ancestry(parent); + chain.push((edge.name.clone(), edge.spec.clone())); + ResolveError::WithChain { + chain, + source: Box::new(inner), + } +} + +fn handle_processed( + graph: &DependencyGraph, receiver: &E, + parent: NodeIndex, + edge: &DependencyEdgeInfo, + processed: &ProcessResult, + next_level: &mut Vec, ) { - if config.skip_preload { - return; + match processed { + ProcessResult::Created(idx) => { + if let Some(node) = graph.get_node(*idx) { + receiver.on_event(BuildEvent::Resolved { + name: &edge.name, + version: &node.version, + }); + if let NodeManifest::Registry(ref manifest) = node.manifest { + let parent_path = graph.get_node(parent).map(|p| p.path.as_path()); + receiver.on_event(BuildEvent::PackagePlaced { + package: manifest.as_ref().into(), + path: &node.path, + parent_path, + }); + } + } + next_level.push(*idx); + } + ProcessResult::Reused(idx) => { + if let Some(node) = graph.get_node(*idx) { + receiver.on_event(BuildEvent::Reused { + name: &edge.name, + version: &node.version, + }); + } + } + ProcessResult::Skipped => { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + } } +} - let start = tokio::time::Instant::now(); +async fn handle_resolved_registry_manifest( + graph: &mut DependencyGraph, + registry: &R, + receiver: &E, + parent: NodeIndex, + edge: &DependencyEdgeInfo, + manifest: Arc, + config: &BuildDepsConfig, +) -> Result> +where + R: RegistryClient, + E: EventReceiver, +{ + let resolved = ResolvedPackage { + name: edge.name.clone(), + version: manifest.version.clone(), + manifest, + }; - let initial_deps = gather_preload_deps(graph, config.peer_deps); - if initial_deps.is_empty() { - return; - } + let processed = if graph + .check_override(parent, &edge.name, Some(&resolved.version)) + .is_some() + { + process_dependency(graph, registry, parent, edge, config) + .await + .map_err(|inner| chain_err(graph, parent, edge, inner))? + } else { + receiver.on_event(BuildEvent::PackageResolved((&*resolved.manifest).into())); + process_dependency_with_resolved(graph, parent, edge, &resolved, config) + }; - tracing::debug!("Preload phase: {} initial dependencies", initial_deps.len()); - receiver.on_event(BuildEvent::PreloadStart { - count: initial_deps.len(), - }); + Ok(processed) +} + +fn resolve_version_from_versions( + edge: &DependencyEdgeInfo, + package_name: &str, + versions: VersionsRef<'_>, + real_spec: &str, +) -> Result, ResolveError> { + if versions.versions.is_empty() { + if edge.edge_type == EdgeType::Optional { + return Ok(None); + } + return Err(ResolveError::NoVersions(package_name.to_string())); + } - let preload_config = PreloadConfig { - peer_deps: config.peer_deps, - concurrency: config.concurrency, + let version = match resolve_target_version(versions, real_spec) { + Ok(version) => version, + Err(_) if edge.edge_type == EdgeType::Optional => return Ok(None), + Err(e) => { + return Err(ResolveError::Version(format!( + "{}@{}: {}", + edge.name, real_spec, e + ))); + } }; + Ok(Some(version)) +} - let stats = preload_manifests( - initial_deps, - registry, - preload_config, - receiver, - |_name, _manifest| { - // Registry client's resolve_package should cache the manifest - }, - ) - .await; +fn resolve_version_from_full_manifest( + edge: &DependencyEdgeInfo, + full: &FullManifest, + real_spec: &str, +) -> Result, ResolveError> { + resolve_version_from_versions(edge, &full.name, full.into(), real_spec) +} - tracing::debug!( - "Preload phase completed: {} success, {} failed", - stats.success_count, - stats.failed_count - ); - receiver.on_event(BuildEvent::PreloadComplete { - success: stats.success_count, - failed: stats.failed_count, +fn collect_registry_prefetches( + manifest: &CoreVersionManifest, + peer_deps: PeerDeps, +) -> Vec<(String, String)> { + let mut deps = Vec::new(); + manifest.for_each_dep(peer_deps, DevDeps::Exclude, |_, name, spec| { + if spec.is_registry_spec() { + deps.push((name.to_string(), spec.to_string())); + } }); - - tracing::debug!("Preload phase: {:?}", start.elapsed()); + deps } -/// Run the BFS traversal phase to build the dependency tree. -async fn run_bfs_phase( +async fn run_main_loop_bfs( graph: &mut DependencyGraph, registry: &R, config: &BuildDepsConfig, receiver: &E, -) -> Result<(), ResolveError> { - let start = tokio::time::Instant::now(); +) -> Result> +where + R: ManifestProvider, + R::Error: Send, + E: EventReceiver, +{ + let supports_semver = registry.supports_semver_resolution(); + let concurrency = config.concurrency.max(1); + + let mut state = ManifestState::with_warm_project_cache(config.warm_project_cache.as_ref()); + let mut fetches: FuturesUnordered = FuturesUnordered::new(); - let mut current_level = vec![graph.root_index]; + let root_idx = graph.root_index; + let mut current_level = vec![root_idx]; while !current_level.is_empty() { receiver.on_event(BuildEvent::LevelStart { node_count: current_level.len(), }); + let mut next_level = Vec::new(); + let mut level_pending = VecDeque::new(); - for node_index in current_level { - // Add workspace nodes to next level - for (_, dep) in graph.get_dependency_edges(node_index) { + for node_index in ¤t_level { + for (_, dep) in graph.get_dependency_edges(*node_index) { if dep.valid && let Some(to) = dep.to && let Some(n) = graph.get_node(to) && n.is_workspace() - && node_index == graph.root_index + && *node_index == root_idx { next_level.push(to); } } - // Process unresolved dependencies - let unresolved = collect_unresolved_edges(graph, node_index); + let unresolved = collect_unresolved_edges(graph, *node_index); receiver.on_event(BuildEvent::DependencyCount { count: unresolved.len(), }); + for edge in unresolved { + level_pending.push_back((*node_index, edge)); + } + } - for edge_info in unresolved { - receiver.on_event(BuildEvent::Resolving { - name: &edge_info.name, - }); - let processed = process_dependency(graph, registry, node_index, &edge_info, config) - .await - .map_err(|inner| chain_err(graph, node_index, &edge_info, inner))?; - handle_processed( - graph, - receiver, - node_index, - &edge_info, - &processed, - &mut next_level, + loop { + pump_fetches(&mut fetches, &mut state.fetch_queues, registry, concurrency); + + while let Some((parent, edge)) = level_pending.pop_front() { + receiver.on_event(BuildEvent::Resolving { name: &edge.name }); + + if !edge.spec.is_registry_spec() { + let processed = process_dependency(graph, registry, parent, &edge, config) + .await + .map_err(|inner| chain_err(graph, parent, &edge, inner))?; + handle_processed(graph, receiver, parent, &edge, &processed, &mut next_level); + continue; + } + + if let Some(processed) = try_reuse_dependency(graph, parent, &edge) { + handle_processed(graph, receiver, parent, &edge, &processed, &mut next_level); + continue; + } + + let (real_name, real_spec) = normalize_spec(&edge.name, &edge.spec); + if supports_semver { + let key = (real_name.clone(), real_spec.clone()); + if let Some(error) = state.version_failures.get(&key) { + if edge.edge_type == EdgeType::Optional { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + } + return Err(chain_err( + graph, + parent, + &edge, + registry_error(format!("{}@{}: {error}", real_name, real_spec)), + )); + } + + if let Some(manifest) = state.version_cache.get(&key).cloned() { + let processed = handle_resolved_registry_manifest( + graph, registry, receiver, parent, &edge, manifest, config, + ) + .await?; + handle_processed( + graph, + receiver, + parent, + &edge, + &processed, + &mut next_level, + ); + continue; + } + + state + .version_waiters + .entry(key.clone()) + .or_default() + .push((parent, edge)); + state.schedule_registry_fetch( + key.0, + key.1, + supports_semver, + FetchPriority::Demand, + ); + } else { + if let Some(error) = state.full_failures.get(&real_name) { + if edge.edge_type == EdgeType::Optional { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + } + return Err(chain_err( + graph, + parent, + &edge, + registry_error(format!("{}: {error}", real_name)), + )); + } + + let version_key = (real_name.clone(), real_spec.clone()); + if let Some(error) = state.version_failures.get(&version_key) { + if edge.edge_type == EdgeType::Optional { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + } + return Err(chain_err( + graph, + parent, + &edge, + registry_error(format!("{}@{}: {error}", real_name, real_spec)), + )); + } + + if let Some(manifest) = state.version_cache.get(&version_key).cloned() { + let processed = handle_resolved_registry_manifest( + graph, registry, receiver, parent, &edge, manifest, config, + ) + .await?; + handle_processed( + graph, + receiver, + parent, + &edge, + &processed, + &mut next_level, + ); + continue; + } + + if let Some(full) = state.full_cache.get(&real_name).cloned() { + let Some(resolved_version) = + resolve_version_from_full_manifest::( + &edge, &full, &real_spec, + ) + .map_err(|inner| chain_err(graph, parent, &edge, inner))? + else { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + }; + + let exact_key = (real_name.clone(), resolved_version.clone()); + if let Some(error) = state.version_failures.get(&exact_key) { + if edge.edge_type == EdgeType::Optional { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + } + return Err(chain_err( + graph, + parent, + &edge, + registry_error(format!("{}@{}: {error}", real_name, real_spec)), + )); + } + + if let Some(manifest) = state.version_cache.get(&exact_key).cloned() { + state + .version_cache + .insert(version_key, Arc::clone(&manifest)); + let processed = handle_resolved_registry_manifest( + graph, registry, receiver, parent, &edge, manifest, config, + ) + .await?; + handle_processed( + graph, + receiver, + parent, + &edge, + &processed, + &mut next_level, + ); + continue; + } + + state + .version_waiters + .entry(exact_key) + .or_default() + .push((parent, edge)); + state.enqueue_version_extract(real_name, resolved_version, full); + continue; + } + + if let Some(versions) = state.versions_cache.get(&real_name).cloned() { + let Some(resolved_version) = resolve_version_from_versions::( + &edge, + &real_name, + (&*versions).into(), + &real_spec, + ) + .map_err(|inner| chain_err(graph, parent, &edge, inner))? + else { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + }; + + let exact_key = (real_name.clone(), resolved_version.clone()); + if let Some(error) = state.version_failures.get(&exact_key) { + if edge.edge_type == EdgeType::Optional { + receiver.on_event(BuildEvent::Skipped { + name: &edge.name, + spec: &edge.spec, + }); + continue; + } + return Err(chain_err( + graph, + parent, + &edge, + registry_error(format!("{}@{}: {error}", real_name, real_spec)), + )); + } + + if let Some(manifest) = state.version_cache.get(&exact_key).cloned() { + state + .version_cache + .insert(version_key, Arc::clone(&manifest)); + let processed = handle_resolved_registry_manifest( + graph, registry, receiver, parent, &edge, manifest, config, + ) + .await?; + handle_processed( + graph, + receiver, + parent, + &edge, + &processed, + &mut next_level, + ); + continue; + } + + state + .version_waiters + .entry(exact_key) + .or_default() + .push((parent, edge)); + state.enqueue_version_fetch(real_name, resolved_version, supports_semver); + continue; + } + + state + .full_waiters + .entry(real_name.clone()) + .or_default() + .push((parent, edge)); + state.schedule_registry_fetch( + real_name, + real_spec, + supports_semver, + FetchPriority::Demand, + ); + } + + pump_fetches(&mut fetches, &mut state.fetch_queues, registry, concurrency); + } + + loop { + let ready = std::future::poll_fn(|cx| match fetches.poll_next_unpin(cx) { + std::task::Poll::Ready(done) => std::task::Poll::Ready(done), + std::task::Poll::Pending => std::task::Poll::Ready(None), + }) + .await; + let Some(done) = ready else { + break; + }; + let done = done.map_err(|e| { + registry_error::(format!("manifest fetch task failed: {e}")) + })?; + + state.apply_fetch_result( + done, + supports_semver, + config.peer_deps, + &mut level_pending, ); } + + if !level_pending.is_empty() { + continue; + } + + if !state.full_waiters.is_empty() || !state.version_waiters.is_empty() { + pump_fetches(&mut fetches, &mut state.fetch_queues, registry, concurrency); + } + + if state.full_waiters.is_empty() && state.version_waiters.is_empty() { + break; + } + + let Some(done) = fetches.next().await else { + let mut fallback = Vec::new(); + for (_, waiters) in state.full_waiters.drain() { + fallback.extend(waiters); + } + for (_, waiters) in state.version_waiters.drain() { + fallback.extend(waiters); + } + for (parent, edge) in fallback { + let processed = process_dependency(graph, registry, parent, &edge, config) + .await + .map_err(|inner| chain_err(graph, parent, &edge, inner))?; + handle_processed(graph, receiver, parent, &edge, &processed, &mut next_level); + } + break; + }; + let done = done.map_err(|e| { + registry_error::(format!("manifest fetch task failed: {e}")) + })?; + + state.apply_fetch_result(done, supports_semver, config.peer_deps, &mut level_pending); } receiver.on_event(BuildEvent::LevelComplete { @@ -972,8 +1711,7 @@ async fn run_bfs_phase( current_level = next_level; } - tracing::debug!("Build phase: {:?}", start.elapsed()); - Ok(()) + Ok(state.into_resolver_cache()) } // ============================================================================ @@ -1347,13 +2085,6 @@ mod tests { assert_eq!(edges.get("lodash"), Some(&"^4.17.0".to_string())); assert_eq!(edges.get("react"), Some(&"^18.0.0".to_string())); assert_eq!(edges.get("tslib"), Some(&"^2.0.0".to_string())); - - // Since edges are now resolved, gather_preload_deps should find them - let deps = gather_preload_deps(&graph, PeerDeps::Skip); - let deps_map: HashMap = deps.into_iter().collect(); - assert_eq!(deps_map.get("lodash"), Some(&"^4.17.0".to_string())); - assert_eq!(deps_map.get("react"), Some(&"^18.0.0".to_string())); - assert_eq!(deps_map.get("tslib"), Some(&"^2.0.0".to_string())); } #[test] @@ -1381,9 +2112,5 @@ mod tests { .map(|(_, e)| (e.name.clone(), e.spec.clone())) .collect(); assert_eq!(edges.get("missing-pkg"), Some(&"catalog:".to_string())); - - // gather_preload_deps should NOT include it (not a registry spec) - let deps = gather_preload_deps(&graph, PeerDeps::Skip); - assert!(deps.is_empty()); } } diff --git a/crates/ruborist/src/service/api.rs b/crates/ruborist/src/service/api.rs index 7df51a303..ed4520684 100644 --- a/crates/ruborist/src/service/api.rs +++ b/crates/ruborist/src/service/api.rs @@ -19,13 +19,12 @@ //! let json = serde_json::to_string_pretty(&output.lock)?; //! ``` +use anyhow::Result; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; -use anyhow::Result; - -use super::cache::{PackageCache, ProjectCacheData}; +use super::cache::ProjectCacheData; use super::fs::Glob; use super::registry::UnifiedRegistry; use super::store::{ManifestStore, NoopStore}; @@ -33,9 +32,8 @@ use crate::model::graph::{DependencyGraph, PackageNode}; use crate::model::node::EdgeType; use crate::model::package_json::PackageJson; use crate::model::package_lock::PackageLock; -use crate::model::util::parse_package_spec; use crate::resolver::builder::{ - BuildDepsConfig, DevDeps, EdgeContext, PeerDeps, add_edges_from, build_deps_with_config, + BuildDepsConfig, DevDeps, EdgeContext, PeerDeps, add_edges_from, build_deps_with_config_output, }; use crate::resolver::runtime::install_runtime_from_map; use crate::resolver::workspace::WorkspaceDiscovery; @@ -55,7 +53,8 @@ pub struct BuildDepsOptions { /// (everything is in-memory). pub manifest_store: Arc, /// Project-level warm cache pre-loaded by the host. Pre-populates the - /// in-memory manifest cache to skip the preload phase on a warm install. + /// in-memory manifest cache so demand BFS can satisfy warm specs without + /// network requests. pub warm_project_cache: Option, /// Maximum concurrent network requests pub concurrency: usize, @@ -204,24 +203,11 @@ where add_edges_from(&mut graph, workspace_index, &ws_pkg, &edge_ctx); } - // 7. Create in-memory package cache and pre-populate from the warm - // project cache (host-supplied; `None` for cold runs). - let package_cache = Arc::new(PackageCache::default()); - let (cache_count, missing_count) = - prepopulate_warm_cache(&package_cache, warm_project_cache.as_ref()); - if missing_count > 0 { - tracing::warn!( - "Project cache has {missing_count} specs with missing manifests, will re-fetch from registry" - ); - } - if cache_count > 0 { - tracing::debug!("Loaded {cache_count} manifests from project cache"); - } - - // 8. Create registry client with shared cache and persistence backend. + // 7. Create registry client with persistence backend. The resolver loop + // owns the in-memory manifest cache; the provider handles only I/O, + // parsing, and optional persistent store lookups/writes. let mut builder = UnifiedRegistry::builder() .registry(®istry_url) - .cache(package_cache) .store(Arc::clone(&manifest_store)); if let Some(semver) = supports_semver { builder = builder.supports_semver(semver); @@ -234,44 +220,25 @@ where registry.supports_semver(), ); - let skip_preload = cache_count > 0; let mut config = BuildDepsConfig::default() .with_peer_deps(peer_deps) .with_concurrency(concurrency) - .with_skip_preload(skip_preload) .with_catalogs(catalogs) .with_warm_project_cache(warm_project_cache); if let Some(dir) = cache_dir { config = config.with_cache_dir(dir); } - if skip_preload { - tracing::debug!( - "Skipping preload phase (project cache has {} entries)", - cache_count - ); - } - // Preserve the typed error via `Error::new` + `.context(...)` so CLI // renderers (e.g. pm's format_print) can downcast and pretty-print the // dependency chain carried by `ResolveError::WithChain`. - build_deps_with_config(&mut graph, ®istry, config, &receiver) + let manifest_cache = build_deps_with_config_output(&mut graph, ®istry, config, &receiver) .await .map_err(|e| anyhow::Error::new(e).context("Dependency resolution failed"))?; let (packages, _total) = graph.serialize_to_packages(&root_path); - // Export project cache from memory cache for the host to persist. - let mut project_cache = ProjectCacheData::default(); - for (key, manifest) in registry.cache().export_version_manifests() { - // `parse_package_spec` rather than `split_once('@')` so scoped names - // (`@babel/core@^7.0.0`) parse to (`@babel/core`, `^7.0.0`). - let (name, spec) = parse_package_spec(&key); - let version = manifest.version.clone(); - let pkg_cache = project_cache.cache.entry(name.to_string()).or_default(); - pkg_cache.specs.insert(spec.to_string(), version.clone()); - pkg_cache.manifests.insert(version, (*manifest).clone()); - } + let project_cache = manifest_cache.into_project_cache(); Ok(BuildDepsOutput { lock: PackageLock::new(&pkg.name, &pkg.version, packages), @@ -279,32 +246,6 @@ where }) } -/// Pre-populate `cache` from a warm project cache. Returns -/// `(loaded, missing)` — `loaded` is the count of usable spec→manifest -/// entries; `missing` counts specs whose resolved version had no manifest -/// (corrupted cache, will be re-fetched). -fn prepopulate_warm_cache(cache: &PackageCache, warm: Option<&ProjectCacheData>) -> (usize, usize) { - let Some(warm) = warm else { - return (0, 0); - }; - let mut loaded = 0; - let mut missing = 0; - for (name, pkg_cache) in &warm.cache { - for (spec, version) in &pkg_cache.specs { - let Some(manifest) = pkg_cache.manifests.get(version) else { - tracing::debug!( - "Project cache missing manifest: {name}@{spec} (version {version})" - ); - missing += 1; - continue; - }; - cache.set_version_manifest(name.clone(), spec.clone(), Arc::new(manifest.clone())); - loaded += 1; - } - } - (loaded, missing) -} - #[cfg(test)] mod tests { use super::*; From f8cd7aba4c5845d171c40e32d08e9a3ef42adacd Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 03:23:56 +0800 Subject: [PATCH 15/21] test(pm): cover resolver demand mainloop --- crates/ruborist/src/resolver/builder.rs | 470 +++++++++++++++++++++++- 1 file changed, 469 insertions(+), 1 deletion(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index e7a1be14b..9e6b573e5 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -1797,10 +1797,11 @@ fn graph_to_package_lock( mod tests { use std::collections::HashMap; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; use super::*; use crate::model::manifest::CoreVersionManifest; - use crate::traits::registry::mock::MockRegistryClient; + use crate::traits::registry::mock::{MockError, MockRegistryClient}; fn create_version_manifest(name: &str, version: &str) -> CoreVersionManifest { CoreVersionManifest { @@ -1936,6 +1937,473 @@ mod tests { assert_eq!(lodash.version, Some("4.17.21".to_string())); } + #[derive(Clone)] + struct CountingRegistry { + inner: MockRegistryClient, + shared_version_jobs: Arc, + } + + impl crate::traits::registry::RegistryClient for CountingRegistry { + type Error = MockError; + + async fn fetch_full_manifest(&self, name: &str) -> Result, Self::Error> { + self.inner.fetch_full_manifest(name).await + } + } + + #[async_trait::async_trait] + impl ManifestProvider for CountingRegistry { + async fn execute_manifest_job( + &self, + job: ManifestJob, + ) -> Result { + if matches!( + &job, + ManifestJob::Full { name, .. } + | ManifestJob::Version { name, .. } + | ManifestJob::ExtractVersion { name, .. } + if name == "shared" + ) { + self.shared_version_jobs.fetch_add(1, Ordering::Relaxed); + } + self.inner.execute_manifest_job(job).await + } + } + + #[tokio::test] + async fn test_non_semver_exact_version_extract_single_flight() { + let mut inner = MockRegistryClient::new(); + inner.add_package( + "a", + "1.0.0", + create_version_manifest_with_deps("a", "1.0.0", vec![("shared", "^1.0.0")]), + ); + inner.add_package( + "b", + "1.0.0", + create_version_manifest_with_deps("b", "1.0.0", vec![("shared", "~1.2.0")]), + ); + inner.add_package( + "shared", + "1.2.3", + create_version_manifest("shared", "1.2.3"), + ); + + let shared_version_jobs = Arc::new(AtomicUsize::new(0)); + let registry = CountingRegistry { + inner, + shared_version_jobs: Arc::clone(&shared_version_jobs), + }; + let pkg = PackageJson { + dependencies: Some(HashMap::from([ + ("a".to_string(), "1.0.0".to_string()), + ("b".to_string(), "1.0.0".to_string()), + ])), + ..PackageJson::new("test-project", "1.0.0") + }; + + let lock = resolve(&pkg, ®istry).await.unwrap(); + + assert!(lock.packages.contains_key("node_modules/shared")); + assert_eq!(shared_version_jobs.load(Ordering::Relaxed), 1); + } + + #[test] + fn test_schedule_registry_fetch_dedupes_semver_request() { + let mut state = ManifestState::default(); + + state.schedule_registry_fetch( + "pkg".to_string(), + "^1.0.0".to_string(), + true, + FetchPriority::Demand, + ); + state.schedule_registry_fetch( + "pkg".to_string(), + "^1.0.0".to_string(), + true, + FetchPriority::Demand, + ); + + assert!( + state + .fetch_queues + .queued + .contains_key(&FetchKey::Version("pkg".to_string(), "^1.0.0".to_string())) + ); + match state + .fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + { + ManifestJob::Version { + name, + spec, + fetch_spec, + format, + } => { + assert_eq!(name, "pkg"); + assert_eq!(spec, "^1.0.0"); + assert_eq!(fetch_spec, "^1.0.0"); + assert!(matches!(format, MetadataFormat::Abbreviated)); + } + _ => panic!("expected version fetch request"), + } + } + + #[test] + fn test_fetch_queues_prioritize_demand_over_prefetch() { + let mut fetch_queues = FetchQueues::default(); + fetch_queues.enqueue( + ManifestJob::Full { + name: "prefetch".to_string(), + spec: None, + }, + FetchPriority::Prefetch, + ); + fetch_queues.enqueue( + ManifestJob::Version { + name: "demand".to_string(), + spec: "^1.0.0".to_string(), + fetch_spec: "^1.0.0".to_string(), + format: MetadataFormat::Abbreviated, + }, + FetchPriority::Demand, + ); + + assert_eq!( + fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + .key(), + FetchKey::Version("demand".to_string(), "^1.0.0".to_string()) + ); + assert_eq!( + fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + .key(), + FetchKey::Full("prefetch".to_string()) + ); + } + + #[test] + fn test_fetch_queues_promotes_prefetch_to_demand() { + let mut fetch_queues = FetchQueues::default(); + fetch_queues.enqueue( + ManifestJob::Full { + name: "pkg".to_string(), + spec: None, + }, + FetchPriority::Prefetch, + ); + fetch_queues.enqueue( + ManifestJob::Full { + name: "pkg".to_string(), + spec: None, + }, + FetchPriority::Demand, + ); + + let key = FetchKey::Full("pkg".to_string()); + assert_eq!(fetch_queues.queued.get(&key), Some(&FetchPriority::Demand)); + assert_eq!( + fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + .key(), + key + ); + assert_eq!( + fetch_queues.active.get(&FetchKey::Full("pkg".to_string())), + Some(&FetchPriority::Demand) + ); + assert!( + fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .is_none() + ); + } + + #[test] + fn test_prefetch_concurrency_limit_tracks_fetch_concurrency() { + assert_eq!(prefetch_concurrency_limit(1), 1); + assert_eq!(prefetch_concurrency_limit(3), 1); + assert_eq!(prefetch_concurrency_limit(8), 2); + } + + #[test] + fn test_apply_fetch_result_caches_exact_version_and_wakes_waiters() { + let mut state = ManifestState { + version_waiters: HashMap::from([( + ("pkg".to_string(), "^1.0.0".to_string()), + vec![( + NodeIndex::new(0), + DependencyEdgeInfo { + edge_id: petgraph::graph::EdgeIndex::new(0), + name: "pkg".to_string(), + spec: "^1.0.0".to_string(), + edge_type: EdgeType::Prod, + }, + )], + )]), + ..Default::default() + }; + state.fetch_queues.active.insert( + FetchKey::Version("pkg".to_string(), "^1.0.0".to_string()), + FetchPriority::Demand, + ); + let mut level_pending = std::collections::VecDeque::new(); + let manifest = Arc::new(create_version_manifest("pkg", "1.2.3")); + + state.apply_fetch_result( + FetchDone::Version { + name: "pkg".to_string(), + spec: "^1.0.0".to_string(), + result: Ok(manifest), + }, + true, + PeerDeps::Skip, + &mut level_pending, + ); + + assert!( + state + .version_cache + .contains_key(&("pkg".to_string(), "^1.0.0".to_string())) + ); + assert!( + state + .version_cache + .contains_key(&("pkg".to_string(), "1.2.3".to_string())) + ); + assert!(state.version_waiters.is_empty()); + assert!(state.fetch_queues.queued.is_empty()); + assert!(state.fetch_queues.active.is_empty()); + assert_eq!(level_pending.len(), 1); + } + + #[test] + fn test_apply_fetch_result_prefetches_transitive_registry_deps() { + let mut state = ManifestState::default(); + state.fetch_queues.active.insert( + FetchKey::Version("pkg".to_string(), "^1.0.0".to_string()), + FetchPriority::Demand, + ); + let mut level_pending = std::collections::VecDeque::new(); + let manifest = Arc::new(create_version_manifest_with_deps( + "pkg", + "1.2.3", + vec![("dep", "^1.0.0"), ("local", "file:../local")], + )); + + state.apply_fetch_result( + FetchDone::Version { + name: "pkg".to_string(), + spec: "^1.0.0".to_string(), + result: Ok(manifest), + }, + true, + PeerDeps::Skip, + &mut level_pending, + ); + + assert!( + state + .fetch_queues + .queued + .contains_key(&FetchKey::Version("dep".to_string(), "^1.0.0".to_string())) + ); + assert!(!state.fetch_queues.queued.contains_key(&FetchKey::Version( + "local".to_string(), + "file:../local".to_string() + ))); + match state + .fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + { + ManifestJob::Version { + name, + spec, + fetch_spec, + format, + } => { + assert_eq!(name, "dep"); + assert_eq!(spec, "^1.0.0"); + assert_eq!(fetch_spec, "^1.0.0"); + assert!(matches!(format, MetadataFormat::Abbreviated)); + } + _ => panic!("expected version prefetch request"), + } + } + + #[test] + fn test_apply_fetch_result_caches_versions_and_wakes_waiters() { + let mut state = ManifestState { + full_waiters: HashMap::from([( + "pkg".to_string(), + vec![( + NodeIndex::new(0), + DependencyEdgeInfo { + edge_id: petgraph::graph::EdgeIndex::new(0), + name: "pkg".to_string(), + spec: "^1.0.0".to_string(), + edge_type: EdgeType::Prod, + }, + )], + )]), + ..Default::default() + }; + state + .fetch_queues + .active + .insert(FetchKey::Full("pkg".to_string()), FetchPriority::Demand); + let mut level_pending = std::collections::VecDeque::new(); + let versions = Arc::new(crate::service::VersionsInfo { + versions: crate::service::Versions { + version_list: vec!["1.2.3".to_string()], + dist_tags: HashMap::from([("latest".to_string(), "1.2.3".to_string())]), + }, + etag: Some("etag".to_string()), + last_updated: 1, + }); + + state.apply_fetch_result( + FetchDone::Full { + name: "pkg".to_string(), + result: Ok(ManifestFullData::Versions(versions)), + }, + false, + PeerDeps::Skip, + &mut level_pending, + ); + + assert!(state.full_cache.is_empty()); + assert!(state.versions_cache.contains_key("pkg")); + assert!(state.full_waiters.is_empty()); + assert!(state.fetch_queues.queued.is_empty()); + assert!(state.fetch_queues.active.is_empty()); + assert_eq!(level_pending.len(), 1); + } + + #[test] + fn test_apply_fetch_result_caches_speculative_full_extract() { + let mut state = ManifestState::default(); + state + .fetch_queues + .active + .insert(FetchKey::Full("pkg".to_string()), FetchPriority::Demand); + let mut level_pending = std::collections::VecDeque::new(); + let full = Arc::new(FullManifest { + name: "pkg".to_string(), + versions: vec!["1.2.3".to_string()], + ..Default::default() + }); + let manifest = Arc::new(create_version_manifest_with_deps( + "pkg", + "1.2.3", + vec![("dep", "^1.0.0")], + )); + + state.apply_fetch_result( + FetchDone::Full { + name: "pkg".to_string(), + result: Ok(ManifestFullData::Full { + manifest: full, + speculative: Some(("^1.0.0".to_string(), manifest)), + }), + }, + false, + PeerDeps::Skip, + &mut level_pending, + ); + + assert!(state.full_cache.contains_key("pkg")); + assert!( + state + .version_cache + .contains_key(&("pkg".to_string(), "^1.0.0".to_string())) + ); + assert!( + state + .version_cache + .contains_key(&("pkg".to_string(), "1.2.3".to_string())) + ); + assert!( + state + .fetch_queues + .queued + .contains_key(&FetchKey::Full("dep".to_string())) + ); + } + + #[test] + fn test_enqueue_version_fetch_uses_exact_key() { + let mut state = ManifestState::default(); + + state.enqueue_version_fetch("pkg".to_string(), "1.2.3".to_string(), false); + state.enqueue_version_fetch("pkg".to_string(), "1.2.3".to_string(), false); + + assert!( + state + .fetch_queues + .queued + .contains_key(&FetchKey::Version("pkg".to_string(), "1.2.3".to_string())) + ); + match state + .fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + { + ManifestJob::Version { + name, + spec, + fetch_spec, + format, + } => { + assert_eq!(name, "pkg"); + assert_eq!(spec, "1.2.3"); + assert_eq!(fetch_spec, "1.2.3"); + assert!(matches!(format, MetadataFormat::Complete)); + } + _ => panic!("expected version fetch request"), + } + } + + #[test] + fn test_enqueue_version_extract_uses_exact_key() { + let mut state = ManifestState::default(); + let full = Arc::new(FullManifest::default()); + + state.enqueue_version_extract("pkg".to_string(), "1.2.3".to_string(), Arc::clone(&full)); + state.enqueue_version_extract("pkg".to_string(), "1.2.3".to_string(), full); + + assert!( + state + .fetch_queues + .queued + .contains_key(&FetchKey::Version("pkg".to_string(), "1.2.3".to_string())) + ); + match state + .fetch_queues + .pop_next(prefetch_concurrency_limit(64)) + .unwrap() + { + ManifestJob::ExtractVersion { + name, + spec, + version, + .. + } => { + assert_eq!(name, "pkg"); + assert_eq!(spec, "1.2.3"); + assert_eq!(version, "1.2.3"); + } + _ => panic!("expected version extract request"), + } + } + // Helper to create a graph with source -> target for testing update_node_type_from_edge // Returns (graph, source_index, target_index) where source is NOT root fn create_source_target_graph() -> (DependencyGraph, NodeIndex, NodeIndex) { From fc7be5f672e91b56df2b8039291884d288c2a792 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 02:59:57 +0800 Subject: [PATCH 16/21] perf(pm): remove obsolete resolver preload --- crates/ruborist/src/resolver/mod.rs | 1 - crates/ruborist/src/resolver/preload.rs | 345 ------------------------ crates/ruborist/src/traits/progress.rs | 20 -- 3 files changed, 366 deletions(-) delete mode 100644 crates/ruborist/src/resolver/preload.rs diff --git a/crates/ruborist/src/resolver/mod.rs b/crates/ruborist/src/resolver/mod.rs index 582e03b31..4b969e183 100644 --- a/crates/ruborist/src/resolver/mod.rs +++ b/crates/ruborist/src/resolver/mod.rs @@ -7,7 +7,6 @@ pub mod edges; pub mod git; #[cfg(feature = "http-tarball")] pub mod http; -pub mod preload; pub mod registry; pub mod runtime; pub mod semver; diff --git a/crates/ruborist/src/resolver/preload.rs b/crates/ruborist/src/resolver/preload.rs deleted file mode 100644 index 1230c5bf6..000000000 --- a/crates/ruborist/src/resolver/preload.rs +++ /dev/null @@ -1,345 +0,0 @@ -//! Parallel manifest preloading for dependency resolution. -//! -//! Uses FuturesUnordered for true streaming concurrency: when a package resolves, -//! its transitive dependencies are immediately added to the queue. - -use std::collections::{HashSet, VecDeque}; -use std::sync::Arc; - -use futures::stream::{FuturesUnordered, StreamExt}; - -use crate::model::manifest::CoreVersionManifest; -use crate::model::node::PeerDeps; -use crate::resolver::registry::resolve_package; -use crate::traits::progress::{BuildEvent, EventReceiver}; -use crate::traits::registry::RegistryClient; - -/// Default concurrency limit for manifest fetching -pub const DEFAULT_CONCURRENCY: usize = 128; - -/// A dependency spec: (name, version_spec) -pub type Dep = (String, String); - -/// Configuration for preload behavior -#[derive(Debug, Clone)] -pub struct PreloadConfig { - /// How to handle peer dependencies. - pub peer_deps: PeerDeps, - /// Maximum number of concurrent manifest fetches - pub concurrency: usize, -} - -impl Default for PreloadConfig { - fn default() -> Self { - Self { - peer_deps: PeerDeps::Skip, - concurrency: DEFAULT_CONCURRENCY, - } - } -} - -/// Statistics from preload operation -#[derive(Debug, Default)] -pub struct PreloadStats { - pub success_count: usize, - pub failed_count: usize, - pub total_processed: usize, - pub min_request_ms: u64, - pub max_request_ms: u64, - pub total_request_ms: u64, -} - -/// Collect dependencies from any deps map, filtering out non-registry specs. -fn collect_deps(map: Option<&std::collections::HashMap>) -> Vec { - use crate::spec::SpecStr; - map.into_iter() - .flatten() - .filter(|(_, spec)| spec.is_registry_spec()) - .map(|(name, spec)| (name.clone(), spec.clone())) - .collect() -} - -/// Extract transitive dependencies from a resolved manifest. -/// Note: devDependencies are NOT included (only root packages install devDeps). -fn extract_transitive_deps(manifest: &CoreVersionManifest, config: &PreloadConfig) -> Vec { - let mut deps = Vec::new(); - deps.extend(collect_deps(manifest.dependencies.as_ref())); - if config.peer_deps == PeerDeps::Include { - deps.extend(collect_deps(manifest.peer_dependencies.as_ref())); - } - deps.extend(collect_deps(manifest.optional_dependencies.as_ref())); - deps -} - -/// Preload all package manifests in parallel with streaming concurrency. -pub async fn preload_manifests( - initial_deps: Vec, - registry: &R, - config: PreloadConfig, - receiver: &E, - mut on_manifest: F, -) -> PreloadStats -where - R: RegistryClient, - E: EventReceiver, - F: FnMut(&str, Arc), -{ - let mut stats = PreloadStats::default(); - let mut processed: HashSet = HashSet::new(); - let mut pending: VecDeque = initial_deps.into(); - let concurrency = config.concurrency; - - tracing::debug!( - "Preload: {} initial deps, concurrency={}", - pending.len(), - concurrency - ); - - let mut futures = FuturesUnordered::new(); - let mut in_flight = 0usize; - let mut started = false; - - loop { - // Fill up to concurrency limit - while in_flight < concurrency { - let item = loop { - let Some((name, spec)) = pending.pop_front() else { - break None; - }; - let key = format!("{}@{}", name, spec); - if !processed.contains(&key) { - processed.insert(key); - break Some((name, spec)); - } - }; - - let Some((name, spec)) = item else { - break; - }; - - if !started { - receiver.on_event(BuildEvent::PreloadStart { count: 1 }); - started = true; - } else { - receiver.on_event(BuildEvent::PreloadQueued { count: 1 }); - } - - receiver.on_event(BuildEvent::PreloadFetching { name: &name }); - - futures.push(async move { - let start = tokio::time::Instant::now(); - let result = resolve_package(registry, &name, &spec).await; - let elapsed_ms = start.elapsed().as_millis() as u64; - (name, result, elapsed_ms) - }); - in_flight += 1; - } - - if in_flight == 0 { - break; - } - - let Some((name, result, elapsed_ms)) = futures.next().await else { - break; - }; - in_flight -= 1; - - if stats.success_count == 0 && stats.failed_count == 0 { - stats.min_request_ms = elapsed_ms; - stats.max_request_ms = elapsed_ms; - } else { - stats.min_request_ms = stats.min_request_ms.min(elapsed_ms); - stats.max_request_ms = stats.max_request_ms.max(elapsed_ms); - } - stats.total_request_ms += elapsed_ms; - - match result { - Ok(resolved) => { - stats.success_count += 1; - - receiver.on_event(BuildEvent::PreloadProgress { - name: &name, - version: &resolved.version, - current: stats.success_count, - }); - - // Send PackageResolved event for pipeline downloading - receiver.on_event(BuildEvent::PackageResolved((&*resolved.manifest).into())); - - pending.extend(extract_transitive_deps(&resolved.manifest, &config)); - on_manifest(&name, resolved.manifest); - } - Err(e) => { - stats.failed_count += 1; - tracing::debug!("Failed to preload {}: {}", name, e); - } - } - } - - stats.total_processed = processed.len(); - - receiver.on_event(BuildEvent::PreloadComplete { - success: stats.success_count, - failed: stats.failed_count, - }); - - let total = stats.success_count + stats.failed_count; - let avg = if total > 0 { - stats.total_request_ms / total as u64 - } else { - 0 - }; - tracing::debug!( - "Preload stats: {} requests, min={}ms, max={}ms, avg={}ms, total={}ms", - total, - stats.min_request_ms, - stats.max_request_ms, - avg, - stats.total_request_ms - ); - - stats -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::model::manifest::CoreVersionManifest; - use crate::traits::progress::NoopReceiver; - use crate::traits::registry::mock::MockRegistryClient; - use std::cell::RefCell; - use std::collections::HashMap; - use std::rc::Rc; - - fn manifest(name: &str, version: &str) -> CoreVersionManifest { - CoreVersionManifest { - name: name.to_string(), - version: version.to_string(), - ..Default::default() - } - } - - fn manifest_with_deps( - name: &str, - version: &str, - deps: Vec<(&str, &str)>, - ) -> CoreVersionManifest { - CoreVersionManifest { - name: name.to_string(), - version: version.to_string(), - dependencies: Some( - deps.into_iter() - .map(|(k, v)| (k.into(), v.into())) - .collect(), - ), - ..Default::default() - } - } - - #[tokio::test] - async fn test_preload_single() { - let mut registry = MockRegistryClient::new(); - registry.add_package("lodash", "4.17.21", manifest("lodash", "4.17.21")); - - let cache: Rc>>> = Default::default(); - let cache_clone = Rc::clone(&cache); - - let stats = preload_manifests( - vec![("lodash".into(), "^4.17.0".into())], - ®istry, - PreloadConfig::default(), - &NoopReceiver, - |name, m| { - cache_clone.borrow_mut().insert(name.into(), m); - }, - ) - .await; - - assert_eq!(stats.success_count, 1); - assert!(cache.borrow().contains_key("lodash")); - } - - #[tokio::test] - async fn test_preload_transitive() { - let mut registry = MockRegistryClient::new(); - registry.add_package( - "a", - "1.0.0", - manifest_with_deps("a", "1.0.0", vec![("b", "^1.0.0")]), - ); - registry.add_package("b", "1.0.0", manifest("b", "1.0.0")); - - let cache: Rc>>> = Default::default(); - let cache_clone = Rc::clone(&cache); - - let stats = preload_manifests( - vec![("a".into(), "^1.0.0".into())], - ®istry, - PreloadConfig::default(), - &NoopReceiver, - |name, m| { - cache_clone.borrow_mut().insert(name.into(), m); - }, - ) - .await; - - assert_eq!(stats.success_count, 2); - assert!(cache.borrow().contains_key("a")); - assert!(cache.borrow().contains_key("b")); - } - - #[tokio::test] - async fn test_preload_missing() { - let registry = MockRegistryClient::new(); - let cache: Rc>>> = Default::default(); - let cache_clone = Rc::clone(&cache); - - let stats = preload_manifests( - vec![("nonexistent".into(), "^1.0.0".into())], - ®istry, - PreloadConfig::default(), - &NoopReceiver, - |name, m| { - cache_clone.borrow_mut().insert(name.into(), m); - }, - ) - .await; - - assert_eq!(stats.failed_count, 1); - assert!(cache.borrow().is_empty()); - } - - #[test] - fn test_is_registry_spec() { - use crate::spec::SpecStr; - - // Local specs — not registry - assert!(!"file:../foo".is_registry_spec()); - assert!(!"link:../foo".is_registry_spec()); - assert!(!"workspace:*".is_registry_spec()); - assert!(!"portal:../foo".is_registry_spec()); - - // Git specs — not registry - assert!(!"git+https://github.com/user/repo.git".is_registry_spec()); - assert!(!"git+ssh://git@github.com/user/repo.git".is_registry_spec()); - assert!(!"git+https://github.com/user/repo.git#main".is_registry_spec()); - assert!(!"git://github.com/user/repo.git".is_registry_spec()); - assert!(!"github:user/repo".is_registry_spec()); - assert!(!"github:user/repo#v1.0".is_registry_spec()); - - // HTTP tarball specs — not registry - assert!(!"https://example.com/pkg.tgz".is_registry_spec()); - assert!(!"http://example.com/pkg.tar.gz".is_registry_spec()); - assert!(!"https://example.com/pkg.tgz?v=1.0".is_registry_spec()); - - // Bare GitHub shorthand — not registry - assert!(!"user/repo".is_registry_spec()); - assert!(!"user/repo#v1.0".is_registry_spec()); - - // Registry specs - assert!("^1.0.0".is_registry_spec()); - assert!("latest".is_registry_spec()); - assert!("~2.0.0".is_registry_spec()); - assert!("@scope/pkg@1.0.0".is_registry_spec()); - } -} diff --git a/crates/ruborist/src/traits/progress.rs b/crates/ruborist/src/traits/progress.rs index 72c739a78..51ee1d450 100644 --- a/crates/ruborist/src/traits/progress.rs +++ b/crates/ruborist/src/traits/progress.rs @@ -10,31 +10,11 @@ pub use crate::model::tarball_info::PackageTarballInfo; /// Events emitted during dependency resolution. #[derive(Debug, Clone, Copy)] pub enum BuildEvent<'a> { - /// Starting preload phase with N initial dependencies. - PreloadStart { count: usize }, - - /// More dependencies were discovered and queued for preloading. - PreloadQueued { count: usize }, - - /// A fetch task was started for a package. - PreloadFetching { name: &'a str }, - - /// A package was preloaded successfully. - PreloadProgress { - name: &'a str, - version: &'a str, - /// Current count of preloaded packages - current: usize, - }, - /// A package was fully resolved with download info. /// This event enables pipeline downloading - tarball can be downloaded /// immediately while other manifests are still being fetched. PackageResolved(PackageTarballInfo<'a>), - /// Preload phase completed with success/failed counts. - PreloadComplete { success: usize, failed: usize }, - /// Starting a new BFS level with N nodes to process. LevelStart { node_count: usize }, From ed9ee587ec93944892ddbb3696ca2cf12588fd08 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 03:02:56 +0800 Subject: [PATCH 17/21] perf(pm): remove registry cache hook --- crates/ruborist/src/service/registry.rs | 5 ----- crates/ruborist/src/traits/registry.rs | 15 --------------- 2 files changed, 20 deletions(-) diff --git a/crates/ruborist/src/service/registry.rs b/crates/ruborist/src/service/registry.rs index a27aab8f7..8f070c25a 100644 --- a/crates/ruborist/src/service/registry.rs +++ b/crates/ruborist/src/service/registry.rs @@ -670,11 +670,6 @@ impl RegistryClient for UnifiedRegistry { &self.registry_url } - fn cache_version_manifest(&self, name: &str, spec: &str, manifest: Arc) { - self.cache - .set_version_manifest(name.to_string(), spec.to_string(), manifest); - } - async fn fetch_full_manifest(&self, name: &str) -> Result, Self::Error> { match self.resolve_full_manifest(name).await? { FullManifestResult::Full(manifest) => Ok(manifest), diff --git a/crates/ruborist/src/traits/registry.rs b/crates/ruborist/src/traits/registry.rs index e30d67112..ea9d84558 100644 --- a/crates/ruborist/src/traits/registry.rs +++ b/crates/ruborist/src/traits/registry.rs @@ -282,21 +282,6 @@ pub trait RegistryClient { }) } } - - /// Cache a resolved version manifest for later use. - /// - /// This method is called by preload to cache (name, spec) -> version_manifest mappings, - /// allowing build phase to directly hit memory cache without traversing full_manifest. - /// - /// Default implementation is no-op (no caching). - fn cache_version_manifest( - &self, - _name: &str, - _spec: &str, - _manifest: Arc, - ) { - // Default: no-op - } } /// A simple in-memory registry client for testing. From 44add1decc055e3dbf95b37176e2644ef19c88f5 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 03:05:57 +0800 Subject: [PATCH 18/21] perf(pm): make unified registry stateless --- crates/ruborist/src/service/registry.rs | 644 ++++++------------------ 1 file changed, 151 insertions(+), 493 deletions(-) diff --git a/crates/ruborist/src/service/registry.rs b/crates/ruborist/src/service/registry.rs index 8f070c25a..18166810d 100644 --- a/crates/ruborist/src/service/registry.rs +++ b/crates/ruborist/src/service/registry.rs @@ -1,9 +1,9 @@ //! Unified registry client implementation. //! //! Provides `UnifiedRegistry` that works on both native and WASM targets. -//! Combines HTTP fetching with in-memory caching, optional persistent storage -//! through a [`ManifestStore`], and automatic registry capability detection -//! (semver support). +//! Combines HTTP fetching, optional persistent storage through a +//! [`ManifestStore`], and automatic registry capability detection (semver +//! support). //! //! For non-semver registries (npmjs.org), the persistent store doubles as the //! ETag source: `versions.json` carries the etag for the next conditional @@ -13,8 +13,7 @@ //! # Architecture //! //! - `manifest` module: Manifest fetching with retry (`fetch_full_manifest`, `fetch_version_manifest`) -//! - `UnifiedRegistry`: in-memory cache + injected `ManifestStore` (host-supplied persistence) -//! - Memory cache (fastest) +//! - `UnifiedRegistry`: injected `ManifestStore` + network fetch/parse adapter //! - `ManifestStore` (host: disk / KV / no-op) //! - Network (authoritative source) @@ -40,9 +39,7 @@ fn current_timestamp_secs() -> u64 { (js_sys::Date::now() / 1000.0) as u64 } -use dashmap::DashSet; - -use super::cache::{PackageCache, Versions, VersionsInfo}; +use super::cache::{Versions, VersionsInfo}; use super::manifest; use super::provider::{ ManifestFullData, ManifestJob, ManifestJobDone, ManifestProvider, ProviderFullManifestBytes, @@ -52,12 +49,11 @@ use crate::model::manifest::{CoreVersionManifest, FullManifest, extract_core_ver use crate::resolver::semver::normalize_spec; use crate::resolver::version::resolve_target_version; use crate::traits::registry::{RegistryClient, RegistryError, ResolvedPackage, is_npm_registry}; -use crate::util::OnceMap; /// Unified registry client that works on both native and WASM. /// /// Cache lookup order: -/// 1. In-memory `PackageCache` (fastest, lost on restart) +/// 1. Resolver-owned in-memory cache in the demand BFS loop /// 2. Host-provided `ManifestStore` (persistent; disk on native, no-op on WASM by default) /// 3. Network (slowest, always authoritative) /// @@ -76,33 +72,13 @@ use crate::util::OnceMap; /// ``` pub struct UnifiedRegistry { registry_url: String, - cache: Arc, store: Arc, supports_semver: bool, - /// Single-flight gate for full-manifest fetches keyed by package name. - /// **Gate-only**: the entry value is `()`; the canonical - /// `Arc` lives in `PackageCache`. Concurrent resolves for - /// the same name share one network + parse round-trip; the - /// 200/304 outcome is recovered by inspecting cache state after the - /// gate releases. - inflight_full: Arc>, - /// Single-flight gate for version-manifest fetches keyed by - /// `(name, spec)`. Same gate-only pattern: the canonical `Arc<…>` - /// lives in `PackageCache.version_manifests`; the gate stores `()`. - inflight_version: Arc>, - /// Dedup set for `store_version_manifest` disk writes keyed by - /// `(name, resolved_version)`. `inflight_version` is keyed by - /// `(name, spec)`, so sibling specs (e.g. `^1.0.0` and `^1.2.0`) - /// resolving to the same version each fire the gate independently - /// and would otherwise issue duplicate writes for the same path. - /// First insert wins; subsequent specs skip the redundant write. - stored_version: Arc>, } /// Builder for `UnifiedRegistry`. pub struct UnifiedRegistryBuilder { registry_url: Option, - cache: Option>, store: Option>, supports_semver: Option, } @@ -112,7 +88,6 @@ impl UnifiedRegistryBuilder { pub fn new() -> Self { Self { registry_url: None, - cache: None, store: None, supports_semver: None, } @@ -130,12 +105,6 @@ impl UnifiedRegistryBuilder { self } - /// Set a shared in-memory cache instance. - pub fn cache(mut self, cache: Arc) -> Self { - self.cache = Some(cache); - self - } - /// Explicitly set whether the registry supports semver resolution. /// /// If not set, defaults to `!is_npm_registry(url)`. @@ -153,19 +122,12 @@ impl UnifiedRegistryBuilder { .supports_semver .unwrap_or_else(|| !is_npm_registry(®istry_url)); - let cache = self - .cache - .unwrap_or_else(|| Arc::new(PackageCache::default())); let store = self.store.unwrap_or_else(|| Arc::new(NoopStore)); UnifiedRegistry { registry_url, - cache, store, supports_semver, - inflight_full: Arc::new(OnceMap::new()), - inflight_version: Arc::new(OnceMap::new()), - stored_version: Arc::new(DashSet::new()), } } } @@ -180,29 +142,12 @@ impl Clone for UnifiedRegistry { fn clone(&self) -> Self { Self { registry_url: self.registry_url.clone(), - cache: Arc::clone(&self.cache), store: Arc::clone(&self.store), supports_semver: self.supports_semver, - inflight_full: Arc::clone(&self.inflight_full), - inflight_version: Arc::clone(&self.inflight_version), - stored_version: Arc::clone(&self.stored_version), } } } -/// Result of `resolve_full_manifest`. -/// -/// Separates the 200 (full data) and 304 (use cache) cases at the type level, -/// so callers can pattern-match instead of string-matching error messages. -enum FullManifestResult { - /// Fresh manifest fetched from the network (HTTP 200). - Full(Arc), - /// ETag matched, versions cache is valid (HTTP 304). - /// Caller should resolve from the in-memory versions cache and - /// fetch individual version manifests as needed. - NotModified, -} - #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] #[cfg_attr(not(target_arch = "wasm32"), async_trait)] impl ManifestProvider for UnifiedRegistry { @@ -247,9 +192,31 @@ impl ManifestProvider for UnifiedRegistry { fetch_spec, format, } => { - let manifest = self - .fetch_version_job_manifest(&name, &spec, &fetch_spec, format) - .await?; + if deno_semver::Version::parse_from_npm(&fetch_spec).is_ok() + && let Some(manifest) = + self.store.load_version_manifest(&name, &fetch_spec).await + { + let manifest = Arc::new(manifest); + return Ok(ManifestJobDone::Version { + name, + spec, + manifest, + }); + } + + let bytes = + manifest::fetch_version_manifest_vec(manifest::FetchVersionManifestOptions { + registry_url: &self.registry_url, + name: &name, + spec: &fetch_spec, + format, + }) + .await + .map_err(RegistryError)?; + let manifest = Arc::new( + manifest::parse_json_vec_off_runtime::(bytes).await?, + ); + self.store_version_manifest(&name, Arc::clone(&manifest)); Ok(ManifestJobDone::Version { name, spec, @@ -262,9 +229,16 @@ impl ManifestProvider for UnifiedRegistry { version, full, } => { - let manifest = self - .extract_version_job_manifest(&name, &spec, version, full) - .await?; + let (resolved_version, manifest) = + extract_core_version_off_runtime(full, version).await; + let manifest = manifest.ok_or_else(|| { + RegistryError(anyhow!( + "Version {} not found in manifest for {}", + resolved_version, + name + )) + })?; + self.store_version_manifest(&name, Arc::clone(&manifest)); Ok(ManifestJobDone::Version { name, spec, @@ -291,14 +265,17 @@ impl UnifiedRegistry { self.supports_semver } - /// Get the underlying in-memory cache. - pub fn cache(&self) -> &PackageCache { - &self.cache + fn store_version_manifest(&self, name: &str, manifest: Arc) { + self.store + .store_version_manifest(name, &manifest.version, Arc::clone(&manifest)); } - fn store_version_manifest(&self, name: &str, manifest: Arc) { - let version = manifest.version.clone(); - self.store.store_version_manifest(name, &version, manifest); + fn version_metadata_format(&self) -> manifest::MetadataFormat { + if self.supports_semver { + manifest::MetadataFormat::Abbreviated + } else { + manifest::MetadataFormat::Complete + } } async fn fetch_full_manifest_job( @@ -331,330 +308,104 @@ impl UnifiedRegistry { } } - async fn extract_version_job_manifest( + async fn execute_version_job( &self, name: &str, - _spec: &str, - version: String, - full: Arc, - ) -> Result, RegistryError> { - let (resolved_version, manifest) = extract_core_version_off_runtime(full, version).await; - let manifest = manifest.ok_or_else(|| { - RegistryError(anyhow!( - "Version {} not found in manifest for {}", - resolved_version, - name - )) - })?; - self.store_version_manifest(name, Arc::clone(&manifest)); - Ok(manifest) - } - - async fn fetch_version_job_manifest( - &self, - name: &str, - _spec: &str, + spec: &str, fetch_spec: &str, - format: manifest::MetadataFormat, ) -> Result, RegistryError> { - if deno_semver::Version::parse_from_npm(fetch_spec).is_ok() - && let Some(manifest) = self.store.load_version_manifest(name, fetch_spec).await - { - return Ok(Arc::new(manifest)); - } - - let manifest = Arc::new( - manifest::fetch_version_manifest(manifest::FetchVersionManifestOptions { - registry_url: &self.registry_url, - name, - spec: fetch_spec, - format, - }) - .await - .map_err(RegistryError)?, - ); - self.store_version_manifest(name, Arc::clone(&manifest)); - Ok(manifest) - } - - /// Resolve full manifest through memory → store → network with ETag validation. - /// - /// Single-flight cache flow: - /// 1. Memory cache hit on `full_manifests` → return immediately (Arc bump). - /// 2. Otherwise, acquire the gate-only `inflight_full`. The worker - /// closure populates `PackageCache` as a side effect: writes - /// `full_manifests` on 200, writes only `versions_info` on 304. - /// 3. After the gate releases, recover the outcome by inspecting cache - /// state — `full_manifests` populated → 200, only `versions_info` - /// populated → 304. - async fn resolve_full_manifest(&self, name: &str) -> Result { - if let Some(manifest) = self.cache.get_full_manifest(name) { - return Ok(FullManifestResult::Full(manifest)); - } - - self.inflight_full - .get_or_try_init::(name.to_string(), || async { - // Re-check inside the worker — a previous winner may have - // populated the cache while we queued on the OnceMap shard. - if self.cache.get_full_manifest(name).is_some() { - return Ok(()); - } - - let store_versions = self.store.load_versions(name).await.map(Arc::new); - let etag = store_versions.as_ref().and_then(|v| v.etag.clone()); - - match manifest::fetch_full_manifest(manifest::FetchManifestOptions { - registry_url: &self.registry_url, - name, - format: manifest::MetadataFormat::Abbreviated, - etag: etag.as_deref(), - }) - .await - .map_err(RegistryError)? - { - manifest::FetchManifestResult::Ok(manifest, new_etag) => { - // Build a `VersionsInfo` strictly for the disk-persist - // task. We do NOT also fill the in-memory - // `versions_info` cache slot on the 200 path: readers - // (`resolve_via_full_manifest::Full`) now go through - // `VersionsRef::from(&Arc)` for this - // case, so the `full_manifests` slot is the single - // source of truth in memory. The `versions_info` slot - // is reserved for the 304 path (or disk-loaded warm - // cache from a previous run). - let versions_info = Arc::new(VersionsInfo { - versions: Versions { - version_list: manifest.versions.clone(), - dist_tags: manifest.dist_tags.clone(), - }, - etag: new_etag, - last_updated: current_timestamp_secs(), - }); - self.cache - .set_full_manifest(name.to_string(), Arc::new(manifest)); - // Fire-and-forget: store may spawn its own task. - self.store.store_versions(name, versions_info); - } - manifest::FetchManifestResult::NotModified => { - if let Some(versions_info) = store_versions { - // Only populate `versions_info`; absence of - // `full_manifests` after the gate is the 304 - // signal. - self.cache.set_versions(name.to_string(), versions_info); - } else { - // Persistent store corrupted/missing, fetch fresh (without etag). - let (manifest, new_etag) = manifest::fetch_full_manifest_fresh( - &self.registry_url, - name, - manifest::MetadataFormat::Abbreviated, - ) - .await - .map_err(RegistryError)?; - - // Same shape as the 200 branch: only the - // canonical `full_manifests` slot is filled in - // memory; the disk-persist task gets its own - // `Arc`. - let versions_info = Arc::new(VersionsInfo { - versions: Versions { - version_list: manifest.versions.clone(), - dist_tags: manifest.dist_tags.clone(), - }, - etag: new_etag, - last_updated: current_timestamp_secs(), - }); - self.cache - .set_full_manifest(name.to_string(), Arc::new(manifest)); - self.store.store_versions(name, versions_info); - } - } - } - Ok(()) + match self + .execute_manifest_job(ManifestJob::Version { + name: name.to_string(), + spec: spec.to_string(), + fetch_spec: fetch_spec.to_string(), + format: self.version_metadata_format(), }) - .await?; - - // Cache state is the discriminator: `full_manifests` populated → 200; - // only `versions_info` populated → 304; neither → cache eviction race. - if let Some(manifest) = self.cache.get_full_manifest(name) { - Ok(FullManifestResult::Full(manifest)) - } else if self.cache.get_versions(name).is_some() { - Ok(FullManifestResult::NotModified) - } else { - Err(RegistryError(anyhow!( - "manifest for {name} vanished from cache after fetch" - ))) + .await? + { + ManifestJobDone::Version { manifest, .. } => Ok(manifest), + ManifestJobDone::Full { .. } => Err(RegistryError(anyhow!( + "provider returned full manifest for version job {name}@{spec}" + ))), } } - /// Resolve version manifest through memory → store → network. - /// - /// Cache key is `name@spec` (e.g., `lodash@^4.17.0`), so the same spec - /// requested multiple times shares one fetch. - /// - /// Non-semver registries resolve the spec by extracting the matching - /// version from the full manifest (the latter is itself single-flight - /// gated by `inflight_full`). Semver registries query the version - /// manifest directly. Either way the work for one `(name, spec)` runs - /// once; concurrent callers for the same key share the result. - async fn resolve_version_manifest( + async fn execute_extract_job( &self, name: &str, spec: &str, + version: String, + full: Arc, ) -> Result, RegistryError> { - if let Some(manifest) = self.cache.get_version_manifest(name, spec) { - return Ok(manifest); + match self + .execute_manifest_job(ManifestJob::ExtractVersion { + name: name.to_string(), + spec: spec.to_string(), + version, + full, + }) + .await? + { + ManifestJobDone::Version { manifest, .. } => Ok(manifest), + ManifestJobDone::Full { .. } => Err(RegistryError(anyhow!( + "provider returned full manifest for extract job {name}@{spec}" + ))), } - - self.inflight_version - .get_or_try_init::( - (name.to_string(), spec.to_string()), - || async { - // Re-check inside the worker (covers the brief window - // between fast-path miss and OnceMap shard-lock acquire). - if self.cache.get_version_manifest(name, spec).is_some() { - return Ok(()); - } - - // Store keys are *resolved* versions — only do this lookup - // when the caller already has an exact version. Range/tag - // specs would always miss (and on Windows, range chars - // like `*` / `>` aren't even valid filenames). - if !self.supports_semver - && deno_semver::Version::parse_from_npm(spec).is_ok() - && let Some(manifest) = self.store.load_version_manifest(name, spec).await - { - // Populate memory cache ourselves — store knows nothing about it. - self.cache.set_version_manifest( - name.to_string(), - spec.to_string(), - Arc::new(manifest), - ); - return Ok(()); - } - - if !self.supports_semver { - // Non-semver: resolve the spec by extracting the matching - // version from the full manifest. `resolve_full_manifest` - // is itself inflight-gated, so concurrent specs for the - // same name share one full-manifest fetch. - let (resolved_version, arc) = - self.resolve_via_full_manifest(name, spec).await?; - self.cache.set_version_manifest( - name.to_string(), - spec.to_string(), - Arc::clone(&arc), - ); - if resolved_version != spec { - self.cache.set_version_manifest( - name.to_string(), - resolved_version.clone(), - Arc::clone(&arc), - ); - } - if self - .stored_version - .insert((name.to_string(), resolved_version.clone())) - { - self.store.store_version_manifest( - name, - &resolved_version, - Arc::clone(&arc), - ); - } - return Ok(()); - } - - let manifest = - manifest::fetch_version_manifest(manifest::FetchVersionManifestOptions { - registry_url: &self.registry_url, - name, - spec, - format: manifest::MetadataFormat::Abbreviated, - }) - .await - .map_err(RegistryError)?; - - self.cache.set_version_manifest( - name.to_string(), - spec.to_string(), - Arc::new(manifest), - ); - Ok(()) - }, - ) - .await?; - - // Gate released — populated either by us, a prior waiter, or a previous - // run that hit memory/disk cache. Missing only on cache eviction race. - self.cache.get_version_manifest(name, spec).ok_or_else(|| { - RegistryError(anyhow!( - "version manifest for {name}@{spec} vanished from cache after fetch" - )) - }) } - /// Resolve `(name, spec)` for non-semver registries by reading the full - /// manifest and extracting the matching version. - /// - /// Handles the 304 (NotModified) case by falling back to the in-memory - /// versions cache for resolution and a single-version network fetch for - /// the manifest itself. The caller is responsible for caching the - /// extracted manifest; this helper does not touch `PackageCache`. - async fn resolve_via_full_manifest( + /// Compatibility wrapper for direct `RegistryClient` callers. The normal + /// install/deps path resolves in the BFS loop; this path executes the same + /// provider jobs without adding a second inflight layer. + async fn resolve_version_manifest_job( &self, name: &str, spec: &str, - ) -> Result<(String, Arc), RegistryError> { - match self.resolve_full_manifest(name).await? { - FullManifestResult::Full(full) => { + ) -> Result, RegistryError> { + if self.supports_semver { + return self.execute_version_job(name, spec, spec).await; + } + + match self + .execute_manifest_job(ManifestJob::Full { + name: name.to_string(), + spec: Some(spec.to_string()), + }) + .await? + { + ManifestJobDone::Full { + data: + ManifestFullData::Full { + manifest: full, + speculative, + }, + .. + } => { + if let Some((_, manifest)) = speculative { + return Ok(manifest); + } if full.versions.is_empty() { return Err(RegistryError(anyhow!("No versions available for {}", name))); } let resolved_version = resolve_target_version((&*full).into(), spec) .map_err(|e| RegistryError(anyhow!("{}@{}: {}", name, spec, e)))?; - // Race window: while we awaited `resolve_full_manifest` (gated - // by `inflight_full`), a sibling spec for the same - // package may have resolved to this same version and populated - // `version_manifests` cache (writer at line 373-387 stores - // both `(name, spec)` and `(name, resolved_version)` keys). - // Reuse the Arc instead of paying the off-runtime reparse. - if let Some(cached) = self.cache.get_version_manifest(name, &resolved_version) { - return Ok((resolved_version, cached)); - } - let (resolved_version, core) = - extract_core_version_off_runtime(full, resolved_version).await; - let core = core.ok_or_else(|| { - RegistryError(anyhow!( - "Version {} not found in manifest for {}", - resolved_version, - name - )) - })?; - Ok((resolved_version, core)) + self.execute_extract_job(name, spec, resolved_version, full) + .await } - FullManifestResult::NotModified => { - // 304 fallback: ETag matched, full payload not refetched. - // Resolve via the lightweight versions cache, then hit the - // network for the single requested version. Direct call into - // `manifest::fetch_version_manifest` (not `self.resolve_version_manifest`) - // to avoid re-entering the inflight_version gate; the outer - // `inflight_version<(name, spec)>` already serializes us. - let versions_info = self.cache.get_versions(name).ok_or_else(|| { - RegistryError(anyhow!("Versions cache not found for {}", name)) - })?; - let resolved_version = resolve_target_version((&*versions_info).into(), spec) + ManifestJobDone::Full { + data: ManifestFullData::Versions(versions), + .. + } => { + if versions.versions.version_list.is_empty() { + return Err(RegistryError(anyhow!("No versions available for {}", name))); + } + let resolved_version = resolve_target_version((&*versions).into(), spec) .map_err(|e| RegistryError(anyhow!("{}@{}: {}", name, spec, e)))?; - let manifest = - manifest::fetch_version_manifest(manifest::FetchVersionManifestOptions { - registry_url: &self.registry_url, - name, - spec: &resolved_version, - format: manifest::MetadataFormat::Complete, - }) + self.execute_version_job(name, spec, &resolved_version) .await - .map_err(RegistryError)?; - Ok((resolved_version, Arc::new(manifest))) } + ManifestJobDone::Version { .. } => Err(RegistryError(anyhow!( + "provider returned version manifest for full job {name}" + ))), } } } @@ -671,16 +422,31 @@ impl RegistryClient for UnifiedRegistry { } async fn fetch_full_manifest(&self, name: &str) -> Result, Self::Error> { - match self.resolve_full_manifest(name).await? { - FullManifestResult::Full(manifest) => Ok(manifest), - FullManifestResult::NotModified => { - // 304 in trait context: caller doesn't have versions cache access, - // so we return an error indicating the manifest is unchanged. - Err(RegistryError(anyhow!( - "No versions available for {} (304 Not Modified but no full manifest cached)", - name - ))) - } + match self + .execute_manifest_job(ManifestJob::Full { + name: name.to_string(), + spec: None, + }) + .await? + { + ManifestJobDone::Full { + data: + ManifestFullData::Full { + manifest, + speculative: _, + }, + .. + } => Ok(manifest), + ManifestJobDone::Full { + data: ManifestFullData::Versions(_), + .. + } => Err(RegistryError(anyhow!( + "No full manifest available for {} (304 Not Modified)", + name + ))), + ManifestJobDone::Version { .. } => Err(RegistryError(anyhow!( + "provider returned version manifest for full job {name}" + ))), } } @@ -689,9 +455,7 @@ impl RegistryClient for UnifiedRegistry { name: &str, spec: &str, ) -> Result, Self::Error> { - // Delegates to `resolve_version_manifest` so the inflight dedup + - // memory/store cache logic lives in one place. - self.resolve_version_manifest(name, spec).await + self.resolve_version_manifest_job(name, spec).await } async fn resolve_package( @@ -711,12 +475,8 @@ impl RegistryClient for UnifiedRegistry { ); } - // Single entry point: `resolve_version_manifest` covers both semver - // (direct version-manifest fetch) and non-semver (full-manifest + - // extract) paths, with `inflight_version<(name, spec)>` ensuring one - // fetch+extraction per `(name, spec)` regardless of registry type. let manifest = self - .resolve_version_manifest(&fetch_name, &fetch_spec) + .resolve_version_manifest_job(&fetch_name, &fetch_spec) .await?; Ok(ResolvedPackage { name: name.to_string(), @@ -728,44 +488,7 @@ impl RegistryClient for UnifiedRegistry { #[cfg(test)] mod tests { - use std::sync::Mutex; - use super::*; - use crate::service::{ManifestJob, ManifestJobDone, ManifestProvider, ManifestStore}; - - #[derive(Default)] - struct RecordingStore { - stored_versions: Mutex>, - } - - #[async_trait] - impl ManifestStore for RecordingStore { - async fn load_versions(&self, _name: &str) -> Option { - None - } - - async fn load_version_manifest( - &self, - _name: &str, - _version: &str, - ) -> Option { - None - } - - fn store_versions(&self, _name: &str, _info: Arc) {} - - fn store_version_manifest( - &self, - name: &str, - version: &str, - _manifest: Arc, - ) { - self.stored_versions - .lock() - .unwrap() - .push((name.to_string(), version.to_string())); - } - } #[test] fn test_is_npm_registry() { @@ -819,77 +542,12 @@ mod tests { } #[test] - fn test_unified_registry_with_shared_cache() { - let shared_cache = Arc::new(PackageCache::default()); - - let registry1 = UnifiedRegistry::builder() - .registry("https://registry.npmmirror.com") - .cache(Arc::clone(&shared_cache)) - .build(); - - let registry2 = UnifiedRegistry::builder() - .registry("https://registry.npmmirror.com") - .cache(Arc::clone(&shared_cache)) - .build(); - - // Both registries share the same cache - assert!(Arc::ptr_eq(®istry1.cache, ®istry2.cache)); - } - - #[tokio::test] - async fn test_unified_registry_executes_extract_manifest_provider_job() { - let store = Arc::new(RecordingStore::default()); + fn test_unified_registry_clone_shares_store() { let registry = UnifiedRegistry::builder() .registry("https://registry.npmmirror.com") - .store(store.clone()) .build(); + let cloned = registry.clone(); - let (full, _) = manifest::parse_full_manifest_with_core_off_runtime( - bytes::Bytes::from_static( - br#"{ - "name":"provider-extract-demo", - "dist-tags":{"latest":"1.0.0"}, - "versions":{ - "1.0.0":{ - "name":"provider-extract-demo", - "version":"1.0.0", - "dist":{"tarball":"https://registry.example/demo-1.0.0.tgz"} - } - } - }"#, - ), - None, - ) - .await - .unwrap(); - - let done = ManifestProvider::execute_manifest_job( - ®istry, - ManifestJob::ExtractVersion { - name: "provider-extract-demo".to_string(), - spec: "latest".to_string(), - version: "1.0.0".to_string(), - full: Arc::new(full), - }, - ) - .await - .unwrap(); - - let ManifestJobDone::Version { - spec, - manifest: returned, - .. - } = done - else { - panic!("expected version manifest job"); - }; - - assert_eq!(spec, "latest"); - assert_eq!(returned.name, "provider-extract-demo"); - assert_eq!(returned.version, "1.0.0"); - assert_eq!( - store.stored_versions.lock().unwrap().as_slice(), - &[("provider-extract-demo".to_string(), "1.0.0".to_string())] - ); + assert!(Arc::ptr_eq(®istry.store, &cloned.store)); } } From 4da37f1a2f07449c54dd36bbede9e81fd4e554e8 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 03:09:15 +0800 Subject: [PATCH 19/21] perf(pm): remove resolver memory cache layer --- crates/ruborist/src/model/mod.rs | 6 +- crates/ruborist/src/service/cache.rs | 217 +----------------------- crates/ruborist/src/service/mod.rs | 5 +- crates/ruborist/src/service/provider.rs | 2 +- crates/ruborist/src/service/store.rs | 6 +- 5 files changed, 13 insertions(+), 223 deletions(-) diff --git a/crates/ruborist/src/model/mod.rs b/crates/ruborist/src/model/mod.rs index 023e310a7..28a1be7f0 100644 --- a/crates/ruborist/src/model/mod.rs +++ b/crates/ruborist/src/model/mod.rs @@ -71,9 +71,9 @@ //! cloning at every cache read and graph insertion: //! //! ```text -//! MemoryCache ──┐ -//! ├── Arc ── (ref-count clone) -//! PackageNode ──┘ +//! ManifestState ──┐ +//! ├── Arc ── (ref-count clone) +//! PackageNode ────┘ //! //! Cold paths (disk I/O, serde) still use owned CoreVersionManifest, //! wrapping in Arc::new() at the boundary. diff --git a/crates/ruborist/src/service/cache.rs b/crates/ruborist/src/service/cache.rs index 1b573cf19..3ca5a98cb 100644 --- a/crates/ruborist/src/service/cache.rs +++ b/crates/ruborist/src/service/cache.rs @@ -1,47 +1,14 @@ -//! In-memory manifest cache for dependency resolution. +//! Manifest cache data structures for dependency resolution. //! -//! ruborist itself only owns the memory tier; persistent storage (disk, remote -//! KV, …) is delegated to a [`super::store::ManifestStore`] supplied by the -//! host. The project-level cache ([`ProjectCacheData`]) is also pure data — -//! callers load/save it themselves and pass it through `BuildDepsOptions` / -//! `BuildDepsOutput`. -//! -//! # Memory Layout -//! -//! ```text -//! MemoryCache ─ Clone ─► (cheap: Arc ref-count) -//! │ -//! └──► Arc single allocation -//! ├── DashMap> sharded, lock-free reads -//! ├── DashMap> -//! └── DashMap> -//! │ -//! ▼ -//! All values Arc-wrapped → get/set is O(1) ref-count, -//! no full clone of the (large) manifest payload. -//! -//! Global singleton: GLOBAL_MEMORY_CACHE (LazyLock) -//! └── all UnifiedRegistry instances share the same cache -//! ``` -//! -//! # Lookup Flow -//! -//! ```text -//! resolve(name, spec) -//! │ -//! ├─ 1. Memory hit? ──yes──► Arc clone → done -//! ├─ 2. ManifestStore hit? ──yes──► populate memory → done -//! └─ 3. Network ──────► fetch JSON → store memory + fire-and-forget -//! ManifestStore::store_* -//! ``` +//! The demand BFS loop owns the in-memory manifest maps for one resolution run. +//! This module only carries serializable data shared between the loop, +//! provider jobs, and host persistence. use std::collections::HashMap; -use std::sync::{Arc, LazyLock}; -use dashmap::DashMap; use serde::{Deserialize, Serialize}; -use crate::model::manifest::{CoreVersionManifest, FullManifest, VersionsRef}; +use crate::model::manifest::{CoreVersionManifest, VersionsRef}; /// Lightweight versions info, persisted by `ManifestStore` for ETag validation. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -74,121 +41,6 @@ impl<'a> From<&'a Versions> for VersionsRef<'a> { } } -// ============================================================================ -// Memory cache (lock-free reads via DashMap) -// ============================================================================ - -/// Thread-safe in-memory manifest cache. Uses sharded `DashMap`s so concurrent -/// reads are lock-free across shards and writes only contend within a single -/// shard; values are stored as `Arc<…>` so reads return cheap ref-count clones -/// instead of cloning the full (potentially large) manifest payload. -#[derive(Clone)] -pub struct MemoryCache(Arc); - -struct MemoryCacheInner { - full_manifests: DashMap>, - versions_info: DashMap>, - version_manifests: DashMap>, -} - -/// Global singleton. All `UnifiedRegistry` instances share the same cache. -static GLOBAL_MEMORY_CACHE: LazyLock = LazyLock::new(|| { - MemoryCache(Arc::new(MemoryCacheInner { - full_manifests: DashMap::new(), - versions_info: DashMap::new(), - version_manifests: DashMap::new(), - })) -}); - -impl Default for MemoryCache { - fn default() -> Self { - GLOBAL_MEMORY_CACHE.clone() - } -} - -impl MemoryCache { - pub fn get_full_manifest(&self, name: &str) -> Option> { - self.0.full_manifests.get(name).map(|v| v.clone()) - } - - pub fn set_full_manifest(&self, name: String, manifest: Arc) { - self.0.full_manifests.insert(name, manifest); - } - - pub fn get_versions(&self, name: &str) -> Option> { - self.0.versions_info.get(name).map(|v| v.clone()) - } - - pub fn set_versions(&self, name: String, info: Arc) { - self.0.versions_info.insert(name, info); - } - - pub fn get_version_manifest( - &self, - name: &str, - version: &str, - ) -> Option> { - let key = format!("{name}@{version}"); - self.0.version_manifests.get(&key).map(|v| v.clone()) - } - - pub fn set_version_manifest( - &self, - name: String, - version: String, - manifest: Arc, - ) { - let key = format!("{name}@{version}"); - self.0.version_manifests.insert(key, manifest); - } - - pub fn full_manifest_count(&self) -> usize { - self.0.full_manifests.len() - } - - pub fn versions_count(&self) -> usize { - self.0.versions_info.len() - } - - pub fn version_manifest_count(&self) -> usize { - self.0.version_manifests.len() - } - - /// Export all version manifests for persistence into a project cache. - pub fn export_version_manifests(&self) -> Vec<(String, Arc)> { - self.0 - .version_manifests - .iter() - .map(|kv| (kv.key().clone(), kv.value().clone())) - .collect() - } - - /// Get cache statistics. - pub fn stats(&self) -> CacheStats { - CacheStats { - full_manifest_count: self.full_manifest_count(), - versions_count: self.versions_count(), - version_manifest_count: self.version_manifest_count(), - } - } -} - -/// Cache statistics. -#[derive(Debug, Clone)] -pub struct CacheStats { - pub full_manifest_count: usize, - pub versions_count: usize, - pub version_manifest_count: usize, -} - -/// Alias kept so call sites that pre-date the disk-cache split can continue -/// to spell the in-memory cache as `PackageCache` without churn. -pub type PackageCache = MemoryCache; - -// ============================================================================ -// Project-level cache (per-project resolved packages) -// ============================================================================ - /// Project-level cache data. /// /// Stores resolved package information for a specific project. Hosts persist @@ -211,62 +63,3 @@ pub struct ProjectPackageCache { #[serde(default)] pub manifests: HashMap, } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_memory_cache_full_manifest() { - let cache = MemoryCache::default(); - - let manifest = FullManifest { - name: "test".to_string(), - ..Default::default() - }; - - cache.set_full_manifest("test".to_string(), Arc::new(manifest)); - - let retrieved = cache.get_full_manifest("test").unwrap(); - assert_eq!(retrieved.name, "test"); - assert!(cache.full_manifest_count() >= 1); - } - - #[test] - fn test_memory_cache_versions() { - let cache = MemoryCache::default(); - - let info = VersionsInfo { - versions: Versions { - version_list: vec!["1.0.0".to_string()], - dist_tags: HashMap::new(), - }, - etag: Some("abc".to_string()), - last_updated: 12345, - }; - - cache.set_versions("test".to_string(), Arc::new(info)); - - let retrieved = cache.get_versions("test").unwrap(); - assert_eq!(retrieved.versions.version_list, vec!["1.0.0"]); - assert!(cache.versions_count() >= 1); - } - - #[test] - fn test_memory_cache_version_manifest() { - let cache = MemoryCache::default(); - - let manifest = CoreVersionManifest { - name: "test".to_string(), - version: "1.0.0".to_string(), - ..Default::default() - }; - - cache.set_version_manifest("test".to_string(), "1.0.0".to_string(), Arc::new(manifest)); - - let retrieved = cache.get_version_manifest("test", "1.0.0").unwrap(); - assert_eq!(retrieved.name, "test"); - assert_eq!(retrieved.version, "1.0.0"); - assert!(cache.version_manifest_count() >= 1); - } -} diff --git a/crates/ruborist/src/service/mod.rs b/crates/ruborist/src/service/mod.rs index 58f41f9a9..2ea05b57c 100644 --- a/crates/ruborist/src/service/mod.rs +++ b/crates/ruborist/src/service/mod.rs @@ -54,10 +54,7 @@ mod registry; mod store; pub use api::{BuildDepsOptions, BuildDepsOutput, build_deps}; -pub use cache::{ - CacheStats, MemoryCache, PackageCache, ProjectCacheData, ProjectPackageCache, Versions, - VersionsInfo, -}; +pub use cache::{ProjectCacheData, ProjectPackageCache, Versions, VersionsInfo}; pub use fs::{Glob, NoopGlob, exists, read_to_string}; pub use http::client_builder; pub use manifest::{ diff --git a/crates/ruborist/src/service/provider.rs b/crates/ruborist/src/service/provider.rs index dff19b844..bb74ea8c4 100644 --- a/crates/ruborist/src/service/provider.rs +++ b/crates/ruborist/src/service/provider.rs @@ -2,7 +2,7 @@ //! //! The demand BFS loop owns per-run cache, waiters, and inflight de-duplication. //! A provider only executes one manifest job and hides whether it satisfied the -//! job from memory, persistent storage, or the network. +//! job from memory, disk/OPFS, or the network. use std::sync::Arc; diff --git a/crates/ruborist/src/service/store.rs b/crates/ruborist/src/service/store.rs index aa9d420b0..f4b3dbb84 100644 --- a/crates/ruborist/src/service/store.rs +++ b/crates/ruborist/src/service/store.rs @@ -1,8 +1,8 @@ //! Persistence backend for the registry's manifest cache. //! -//! ruborist itself owns only the in-memory tier ([`super::cache::MemoryCache`]); -//! any persistent storage (disk, remote KV, …) is supplied by the host through -//! a [`ManifestStore`] implementation. This keeps the resolver free of file I/O +//! The resolver main loop owns the per-run in-memory manifest maps. Persistent +//! storage (disk, remote KV, …) is supplied by the host through a +//! [`ManifestStore`] implementation. This keeps the resolver free of file I/O //! and lets hosts pick their own format, layout, and write strategy. //! //! Contract: From 711835da4496772869ba327b3048a154d34498e4 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Fri, 22 May 2026 03:12:22 +0800 Subject: [PATCH 20/21] perf(pm): update resolver cache ownership docs --- crates/ruborist/src/model/manifest.rs | 5 ++--- crates/ruborist/src/traits/registry.rs | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/ruborist/src/model/manifest.rs b/crates/ruborist/src/model/manifest.rs index 125482993..f4dc05fc1 100644 --- a/crates/ruborist/src/model/manifest.rs +++ b/crates/ruborist/src/model/manifest.rs @@ -128,9 +128,8 @@ impl FullManifest { /// subtree is visited in place — no intermediate `serde_json::Value` /// allocation. /// - /// `OnceMap` single-flight in `UnifiedRegistry` reduces the per-key - /// invocation count to one, so the per-call full-tree parse cost is - /// bounded. + /// The BFS resolver de-duplicates in-flight extract jobs per key, so the + /// per-call full-tree parse cost is bounded. fn extract_version Deserialize<'de>>(&self, version: &str) -> Option { use simd_json::prelude::ValueObjectAccess; let mut buf = self.raw.to_vec(); diff --git a/crates/ruborist/src/traits/registry.rs b/crates/ruborist/src/traits/registry.rs index ea9d84558..24c1843fa 100644 --- a/crates/ruborist/src/traits/registry.rs +++ b/crates/ruborist/src/traits/registry.rs @@ -133,9 +133,10 @@ pub trait RegistryClient { false } - /// Base registry URL used by schedulers that need to classify raw work. + /// The base registry URL used by raw-fetch dependency builders. /// - /// Implementations without a concrete URL can keep the default. + /// Implementations that cannot expose a concrete URL can keep the default; + /// callers should fall back to the regular trait methods when this is empty. fn registry_url(&self) -> &str { "" } From ec935b807cb68824cf5d80b06fccda4b5a13a830 Mon Sep 17 00:00:00 2001 From: elrrrrrrr Date: Mon, 25 May 2026 14:10:34 +0800 Subject: [PATCH 21/21] chore(pm): clarify resolver provider diagnostics --- crates/ruborist/src/resolver/builder.rs | 5 ++++ crates/ruborist/src/service/manifest.rs | 31 +++++++++++++++++++++---- crates/ruborist/src/service/provider.rs | 3 +++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs index 9e6b573e5..843eff885 100644 --- a/crates/ruborist/src/resolver/builder.rs +++ b/crates/ruborist/src/resolver/builder.rs @@ -1683,6 +1683,11 @@ where } let Some(done) = fetches.next().await else { + tracing::warn!( + full_waiters = state.full_waiters.values().map(Vec::len).sum::(), + version_waiters = state.version_waiters.values().map(Vec::len).sum::(), + "manifest fetch stream ended with pending resolver waiters; falling back to sequential resolution" + ); let mut fallback = Vec::new(); for (_, waiters) in state.full_waiters.drain() { fallback.extend(waiters); diff --git a/crates/ruborist/src/service/manifest.rs b/crates/ruborist/src/service/manifest.rs index b68eb975b..60e15927f 100644 --- a/crates/ruborist/src/service/manifest.rs +++ b/crates/ruborist/src/service/manifest.rs @@ -68,11 +68,32 @@ fn parse_full_manifest_with_core_sync( .map_err(|e| anyhow!("JSON parse error: {e}"))?; manifest.raw = raw_bytes; - let speculative = spec.and_then(|spec| { - resolve_target_version((&manifest).into(), &spec) - .ok() - .and_then(|version| manifest.get_core_version(&version).map(|core| (spec, core))) - }); + let speculative = match spec { + Some(spec) => match resolve_target_version((&manifest).into(), &spec) { + Ok(version) => match manifest.get_core_version(&version) { + Some(core) => Some((spec, core)), + None => { + tracing::trace!( + package = %manifest.name, + spec = %spec, + version = %version, + "speculative manifest extract missed resolved version" + ); + None + } + }, + Err(error) => { + tracing::trace!( + package = %manifest.name, + spec = %spec, + error = %error, + "speculative manifest version resolution failed" + ); + None + } + }, + None => None, + }; Ok((manifest, speculative)) } diff --git a/crates/ruborist/src/service/provider.rs b/crates/ruborist/src/service/provider.rs index bb74ea8c4..23ebe9931 100644 --- a/crates/ruborist/src/service/provider.rs +++ b/crates/ruborist/src/service/provider.rs @@ -73,6 +73,9 @@ pub enum ManifestJobDone { } /// Lower-level manifest provider used by the demand BFS loop. +/// +/// Resolver workers clone the provider before spawning jobs, so implementors +/// should keep `Clone` cheap (for example by storing shared state behind `Arc`). #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] #[cfg_attr(not(target_arch = "wasm32"), async_trait)] pub trait ManifestProvider: RegistryClient + Clone + Send + Sync + 'static {