Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions crates/ruborist/src/model/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ impl FullManifest {
/// subtree is visited in place — no intermediate `serde_json::Value`
/// allocation.
///
/// `OnceMap` single-flight in `UnifiedRegistry` reduces the per-key
/// invocation count to one, so the per-call full-tree parse cost is
/// bounded.
/// The BFS resolver de-duplicates in-flight extract jobs per key, so the
/// per-call full-tree parse cost is bounded.
fn extract_version<T: for<'de> Deserialize<'de>>(&self, version: &str) -> Option<T> {
use simd_json::prelude::ValueObjectAccess;
let mut buf = self.raw.to_vec();
Expand Down
6 changes: 3 additions & 3 deletions crates/ruborist/src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@
//! cloning at every cache read and graph insertion:
//!
//! ```text
//! MemoryCache ──┐
//! ├── Arc<CoreVersionManifest> ── (ref-count clone)
//! PackageNode ──┘
//! ManifestState ──┐
//! ├── Arc<CoreVersionManifest> ── (ref-count clone)
//! PackageNode ────
//!
//! Cold paths (disk I/O, serde) still use owned CoreVersionManifest,
//! wrapping in Arc::new() at the boundary.
Expand Down
5 changes: 5 additions & 0 deletions crates/ruborist/src/resolver/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,11 @@ where
}

let Some(done) = fetches.next().await else {
tracing::warn!(
full_waiters = state.full_waiters.values().map(Vec::len).sum::<usize>(),
version_waiters = state.version_waiters.values().map(Vec::len).sum::<usize>(),
"manifest fetch stream ended with pending resolver waiters; falling back to sequential resolution"
);
let mut fallback = Vec::new();
for (_, waiters) in state.full_waiters.drain() {
fallback.extend(waiters);
Expand Down
217 changes: 5 additions & 212 deletions crates/ruborist/src/service/cache.rs
Original file line number Diff line number Diff line change
@@ -1,47 +1,14 @@
//! In-memory manifest cache for dependency resolution.
//! Manifest cache data structures for dependency resolution.
//!
//! ruborist itself only owns the memory tier; persistent storage (disk, remote
//! KV, …) is delegated to a [`super::store::ManifestStore`] supplied by the
//! host. The project-level cache ([`ProjectCacheData`]) is also pure data —
//! callers load/save it themselves and pass it through `BuildDepsOptions` /
//! `BuildDepsOutput`.
//!
//! # Memory Layout
//!
//! ```text
//! MemoryCache ─ Clone ─► (cheap: Arc ref-count)
//! │
//! └──► Arc<MemoryCacheInner> single allocation
//! ├── DashMap<Arc<FullManifest>> sharded, lock-free reads
//! ├── DashMap<Arc<VersionsInfo>>
//! └── DashMap<Arc<CoreVersionManifest>>
//! │
//! ▼
//! All values Arc-wrapped → get/set is O(1) ref-count,
//! no full clone of the (large) manifest payload.
//!
//! Global singleton: GLOBAL_MEMORY_CACHE (LazyLock)
//! └── all UnifiedRegistry instances share the same cache
//! ```
//!
//! # Lookup Flow
//!
//! ```text
//! resolve(name, spec)
//! │
//! ├─ 1. Memory hit? ──yes──► Arc<CoreVersionManifest> clone → done
//! ├─ 2. ManifestStore hit? ──yes──► populate memory → done
//! └─ 3. Network ──────► fetch JSON → store memory + fire-and-forget
//! ManifestStore::store_*
//! ```
//! The demand BFS loop owns the in-memory manifest maps for one resolution run.
//! This module only carries serializable data shared between the loop,
//! provider jobs, and host persistence.

use std::collections::HashMap;
use std::sync::{Arc, LazyLock};

use dashmap::DashMap;
use serde::{Deserialize, Serialize};

use crate::model::manifest::{CoreVersionManifest, FullManifest, VersionsRef};
use crate::model::manifest::{CoreVersionManifest, VersionsRef};

/// Lightweight versions info, persisted by `ManifestStore` for ETag validation.
#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -74,121 +41,6 @@ impl<'a> From<&'a Versions> for VersionsRef<'a> {
}
}

// ============================================================================
// Memory cache (lock-free reads via DashMap)
// ============================================================================

/// Thread-safe in-memory manifest cache. Uses sharded `DashMap`s so concurrent
/// reads are lock-free across shards and writes only contend within a single
/// shard; values are stored as `Arc<…>` so reads return cheap ref-count clones
/// instead of cloning the full (potentially large) manifest payload.
#[derive(Clone)]
pub struct MemoryCache(Arc<MemoryCacheInner>);

struct MemoryCacheInner {
full_manifests: DashMap<String, Arc<FullManifest>>,
versions_info: DashMap<String, Arc<VersionsInfo>>,
version_manifests: DashMap<String, Arc<CoreVersionManifest>>,
}

/// Global singleton. All `UnifiedRegistry` instances share the same cache.
static GLOBAL_MEMORY_CACHE: LazyLock<MemoryCache> = LazyLock::new(|| {
MemoryCache(Arc::new(MemoryCacheInner {
full_manifests: DashMap::new(),
versions_info: DashMap::new(),
version_manifests: DashMap::new(),
}))
});

impl Default for MemoryCache {
fn default() -> Self {
GLOBAL_MEMORY_CACHE.clone()
}
}

impl MemoryCache {
pub fn get_full_manifest(&self, name: &str) -> Option<Arc<FullManifest>> {
self.0.full_manifests.get(name).map(|v| v.clone())
}

pub fn set_full_manifest(&self, name: String, manifest: Arc<FullManifest>) {
self.0.full_manifests.insert(name, manifest);
}

pub fn get_versions(&self, name: &str) -> Option<Arc<VersionsInfo>> {
self.0.versions_info.get(name).map(|v| v.clone())
}

pub fn set_versions(&self, name: String, info: Arc<VersionsInfo>) {
self.0.versions_info.insert(name, info);
}

pub fn get_version_manifest(
&self,
name: &str,
version: &str,
) -> Option<Arc<CoreVersionManifest>> {
let key = format!("{name}@{version}");
self.0.version_manifests.get(&key).map(|v| v.clone())
}

pub fn set_version_manifest(
&self,
name: String,
version: String,
manifest: Arc<CoreVersionManifest>,
) {
let key = format!("{name}@{version}");
self.0.version_manifests.insert(key, manifest);
}

pub fn full_manifest_count(&self) -> usize {
self.0.full_manifests.len()
}

pub fn versions_count(&self) -> usize {
self.0.versions_info.len()
}

pub fn version_manifest_count(&self) -> usize {
self.0.version_manifests.len()
}

/// Export all version manifests for persistence into a project cache.
pub fn export_version_manifests(&self) -> Vec<(String, Arc<CoreVersionManifest>)> {
self.0
.version_manifests
.iter()
.map(|kv| (kv.key().clone(), kv.value().clone()))
.collect()
}

/// Get cache statistics.
pub fn stats(&self) -> CacheStats {
CacheStats {
full_manifest_count: self.full_manifest_count(),
versions_count: self.versions_count(),
version_manifest_count: self.version_manifest_count(),
}
}
}

/// Cache statistics.
#[derive(Debug, Clone)]
pub struct CacheStats {
pub full_manifest_count: usize,
pub versions_count: usize,
pub version_manifest_count: usize,
}

/// Alias kept so call sites that pre-date the disk-cache split can continue
/// to spell the in-memory cache as `PackageCache` without churn.
pub type PackageCache = MemoryCache;

// ============================================================================
// Project-level cache (per-project resolved packages)
// ============================================================================

/// Project-level cache data.
///
/// Stores resolved package information for a specific project. Hosts persist
Expand All @@ -211,62 +63,3 @@ pub struct ProjectPackageCache {
#[serde(default)]
pub manifests: HashMap<String, CoreVersionManifest>,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_memory_cache_full_manifest() {
let cache = MemoryCache::default();

let manifest = FullManifest {
name: "test".to_string(),
..Default::default()
};

cache.set_full_manifest("test".to_string(), Arc::new(manifest));

let retrieved = cache.get_full_manifest("test").unwrap();
assert_eq!(retrieved.name, "test");
assert!(cache.full_manifest_count() >= 1);
}

#[test]
fn test_memory_cache_versions() {
let cache = MemoryCache::default();

let info = VersionsInfo {
versions: Versions {
version_list: vec!["1.0.0".to_string()],
dist_tags: HashMap::new(),
},
etag: Some("abc".to_string()),
last_updated: 12345,
};

cache.set_versions("test".to_string(), Arc::new(info));

let retrieved = cache.get_versions("test").unwrap();
assert_eq!(retrieved.versions.version_list, vec!["1.0.0"]);
assert!(cache.versions_count() >= 1);
}

#[test]
fn test_memory_cache_version_manifest() {
let cache = MemoryCache::default();

let manifest = CoreVersionManifest {
name: "test".to_string(),
version: "1.0.0".to_string(),
..Default::default()
};

cache.set_version_manifest("test".to_string(), "1.0.0".to_string(), Arc::new(manifest));

let retrieved = cache.get_version_manifest("test", "1.0.0").unwrap();
assert_eq!(retrieved.name, "test");
assert_eq!(retrieved.version, "1.0.0");
assert!(cache.version_manifest_count() >= 1);
}
}
31 changes: 26 additions & 5 deletions crates/ruborist/src/service/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,32 @@ fn parse_full_manifest_with_core_sync(
.map_err(|e| anyhow!("JSON parse error: {e}"))?;
manifest.raw = raw_bytes;

let speculative = spec.and_then(|spec| {
resolve_target_version((&manifest).into(), &spec)
.ok()
.and_then(|version| manifest.get_core_version(&version).map(|core| (spec, core)))
});
let speculative = match spec {
Some(spec) => match resolve_target_version((&manifest).into(), &spec) {
Ok(version) => match manifest.get_core_version(&version) {
Some(core) => Some((spec, core)),
None => {
tracing::trace!(
package = %manifest.name,
spec = %spec,
version = %version,
"speculative manifest extract missed resolved version"
);
None
}
},
Err(error) => {
tracing::trace!(
package = %manifest.name,
spec = %spec,
error = %error,
"speculative manifest version resolution failed"
);
None
}
},
None => None,
};

Ok((manifest, speculative))
}
Expand Down
5 changes: 1 addition & 4 deletions crates/ruborist/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,7 @@ mod registry;
mod store;

pub use api::{BuildDepsOptions, BuildDepsOutput, build_deps};
pub use cache::{
CacheStats, MemoryCache, PackageCache, ProjectCacheData, ProjectPackageCache, Versions,
VersionsInfo,
};
pub use cache::{ProjectCacheData, ProjectPackageCache, Versions, VersionsInfo};
pub use fs::{Glob, NoopGlob, exists, read_to_string};
pub use http::client_builder;
pub use manifest::{
Expand Down
5 changes: 4 additions & 1 deletion crates/ruborist/src/service/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//!
//! The demand BFS loop owns per-run cache, waiters, and inflight de-duplication.
//! A provider only executes one manifest job and hides whether it satisfied the
//! job from memory, persistent storage, or the network.
//! job from memory, disk/OPFS, or the network.

use std::sync::Arc;

Expand Down Expand Up @@ -73,6 +73,9 @@ pub enum ManifestJobDone {
}

/// Lower-level manifest provider used by the demand BFS loop.
///
/// Resolver workers clone the provider before spawning jobs, so implementors
/// should keep `Clone` cheap (for example by storing shared state behind `Arc`).
#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
pub trait ManifestProvider: RegistryClient + Clone + Send + Sync + 'static {
Expand Down
Loading
Loading