diff --git a/.gitignore b/.gitignore index fedf98b..c849745 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,5 @@ npm/ .cursor profile.json.gz + +fixtures/*afm*.lock diff --git a/Cargo.lock b/Cargo.lock index b739c16..abe2039 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,7 +75,7 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "berry" -version = "0.5.0" +version = "0.5.1-beta0-js.0" dependencies = [ "nom", "smallvec", @@ -83,7 +83,7 @@ dependencies = [ [[package]] name = "berry-bench" -version = "0.5.0" +version = "0.5.1-beta0-js.0" dependencies = [ "berry", "berry-test", @@ -93,7 +93,7 @@ dependencies = [ [[package]] name = "berry-bench-bin" -version = "0.5.0" +version = "0.5.1-beta0-js.0" dependencies = [ "berry", "berry-test", @@ -103,9 +103,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "berry-compare-bin" +version = "0.5.1-beta0-js.0" +dependencies = [ + "berry", + "berry-test", + "clap", + "serde", + "serde_json", +] + [[package]] name = "berry-dump-bin" -version = "0.5.0" +version = "0.5.1-beta0-js.0" dependencies = [ "berry", "clap", @@ -123,7 +134,7 @@ dependencies = [ [[package]] name = "berry-test" -version = "0.5.0" +version = "0.5.1-beta0-js.0" dependencies = [ "berry", "rstest", diff --git a/Cargo.toml b/Cargo.toml index 4fd6c29..c21a05a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ resolver = "2" edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/spanishpear/berry" - version = "0.5.0" + version = "0.5.1-beta0-js.0" [workspace.dependencies] berry = { path = "crates/berry-core" } diff --git a/crates/berry-bench-bin/src/main.rs b/crates/berry-bench-bin/src/main.rs index 69c7dd4..531d1f5 100644 --- a/crates/berry-bench-bin/src/main.rs +++ b/crates/berry-bench-bin/src/main.rs @@ -80,11 +80,10 @@ impl FixtureTarget { fn from_path(path: impl Into) -> Self { let path = path.into(); - let label = path - .file_name() - .and_then(|name| name.to_str()) - .map(std::string::ToString::to_string) - .unwrap_or_else(|| path.display().to_string()); + let label = path.file_name().and_then(|name| name.to_str()).map_or_else( + || path.display().to_string(), + std::string::ToString::to_string, + ); Self { label, source: FixtureSource::ArbitraryPath(path), @@ -185,10 +184,8 @@ fn benchmark_fixture( let fixture_str = fixture.as_str(); println!("Benchmarking {} ({file_size} bytes)...", target.label); - if verbose { - if let Some(path) = target.source_path() { - println!(" Source path: {}", path.display()); - } + if verbose && let Some(path) = target.source_path() { + println!(" Source path: {}", path.display()); } // Warmup runs diff --git a/crates/berry-compare-bin/Cargo.toml b/crates/berry-compare-bin/Cargo.toml new file mode 100644 index 0000000..add7302 --- /dev/null +++ b/crates/berry-compare-bin/Cargo.toml @@ -0,0 +1,19 @@ +[package] +authors.workspace = true +edition.workspace = true +license.workspace = true +name = "berry-compare-bin" +repository.workspace = true +version.workspace = true + +publish = false + +[dependencies] +berry = { workspace = true } +berry-test = { workspace = true } +clap = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } + +[lints] +workspace = true diff --git a/crates/berry-compare-bin/src/main.rs b/crates/berry-compare-bin/src/main.rs new file mode 100644 index 0000000..a63b0d4 --- /dev/null +++ b/crates/berry-compare-bin/src/main.rs @@ -0,0 +1,686 @@ +//! A tool to compare berry-core parsing with @yarnpkg/parsers +//! +//! Parses a lockfile with both parsers, compares the results, +//! and measures the performance of each. + +use berry::lockfile::Entry; +use berry::parse::parse_lockfile; +use berry_test::load_fixture_from_path; +use clap::Parser; +use serde::Deserialize; +use std::collections::HashMap; +use std::fs; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Instant; + +#[derive(Parser)] +#[command(name = "berry-compare")] +#[command(about = "Compare berry-core and @yarnpkg/parsers lockfile parsing")] +struct Args { + /// Path to the lockfile to compare + #[arg(short, long)] + fixture: PathBuf, + + /// Number of iterations for timing + #[arg(short, long, default_value = "5")] + iterations: usize, +} + +/// A simplified representation of a parsed package for comparison +#[derive(Debug, Deserialize)] +struct CompareEntry { + descriptors: Vec, + version: Option, + resolution: Option, + language_name: String, + link_type: String, + checksum: Option, + conditions: Option, + dependencies: HashMap, + dependencies_meta: HashMap, + peer_dependencies: HashMap, + peer_dependencies_meta: HashMap, + bin: HashMap, +} + +/// Simplified dependency meta for comparison +#[derive(Debug, Deserialize, PartialEq, Eq)] +struct DependencyMetaCompare { + built: Option, + optional: Option, + unplugged: Option, +} + +/// Simplified peer dependency meta for comparison +#[derive(Debug, Deserialize, PartialEq, Eq)] +struct PeerDependencyMetaCompare { + optional: bool, +} + +/// Output from the Node.js parser script +#[derive(Debug, Deserialize)] +struct JsParserOutput { + entries: Vec, +} + +/// Convert a berry Entry to `CompareEntry` +fn entry_to_compare(entry: &Entry<'_>) -> CompareEntry { + let mut descriptors: Vec = entry + .descriptors + .iter() + .map(|d| { + let ident = d.ident(); + let name = ident.scope().map_or_else( + || ident.name().to_string(), + |scope| format!("{}/{}", scope, ident.name()), + ); + format!("{}@{}", name, d.range()) + }) + .collect(); + descriptors.sort(); + + let dependencies: HashMap = entry + .package + .dependencies + .iter() + .map(|(ident, desc)| { + let name = ident.scope().map_or_else( + || ident.name().to_string(), + |scope| format!("{}/{}", scope, ident.name()), + ); + (name, desc.range().to_string()) + }) + .collect(); + + let peer_dependencies: HashMap = entry + .package + .peer_dependencies + .iter() + .map(|(ident, desc)| { + let name = ident.scope().map_or_else( + || ident.name().to_string(), + |scope| format!("{}/{}", scope, ident.name()), + ); + (name, desc.range().to_string()) + }) + .collect(); + + let bin: HashMap = entry + .package + .bin + .iter() + .map(|(k, v)| ((*k).to_string(), (*v).to_string())) + .collect(); + + let dependencies_meta: HashMap = entry + .package + .dependencies_meta + .iter() + .filter_map(|(ident, meta)| { + meta.as_ref().map(|m| { + let name = ident.scope().map_or_else( + || ident.name().to_string(), + |scope| format!("{}/{}", scope, ident.name()), + ); + ( + name, + DependencyMetaCompare { + built: m.built, + optional: m.optional, + unplugged: m.unplugged, + }, + ) + }) + }) + .collect(); + + let peer_dependencies_meta: HashMap = entry + .package + .peer_dependencies_meta + .iter() + .map(|(ident, meta)| { + let name = ident.scope().map_or_else( + || ident.name().to_string(), + |scope| format!("{}/{}", scope, ident.name()), + ); + ( + name, + PeerDependencyMetaCompare { + optional: meta.optional, + }, + ) + }) + .collect(); + + CompareEntry { + descriptors, + version: entry.package.version.map(String::from), + resolution: entry.package.resolution.map(String::from), + language_name: entry.package.language_name.to_string(), + link_type: match entry.package.link_type { + berry::package::LinkType::Hard => "hard".to_string(), + berry::package::LinkType::Soft => "soft".to_string(), + }, + checksum: entry.package.checksum.map(String::from), + conditions: entry.package.conditions.map(String::from), + dependencies, + dependencies_meta, + peer_dependencies, + peer_dependencies_meta, + bin, + } +} + +/// Generate a Node.js script to parse the given lockfile +fn generate_js_script(fixture_path: &Path) -> String { + let path_str = fixture_path.display(); + format!( + r"const fs = require('fs'); + +let parseSyml; +try {{ + parseSyml = require('@yarnpkg/parsers').parseSyml; +}} catch (e) {{ + console.error('Error: @yarnpkg/parsers not installed. Run: npm install -g @yarnpkg/parsers'); + process.exit(1); +}} + +const content = fs.readFileSync('{path_str}', 'utf8'); +const parsed = parseSyml(content); + +const entries = []; + +for (const [key, value] of Object.entries(parsed)) {{ + if (key === '__metadata') continue; + + const descriptors = key.split(', ').map(d => d.trim()).sort(); + + const dependencies = {{}}; + if (value.dependencies) {{ + for (const [name, range] of Object.entries(value.dependencies)) {{ + dependencies[name] = String(range); + }} + }} + + const peerDependencies = {{}}; + if (value.peerDependencies) {{ + for (const [name, range] of Object.entries(value.peerDependencies)) {{ + peerDependencies[name] = String(range); + }} + }} + + const bin = {{}}; + if (value.bin) {{ + for (const [name, path] of Object.entries(value.bin)) {{ + bin[name] = String(path); + }} + }} + + const dependenciesMeta = {{}}; + if (value.dependenciesMeta) {{ + for (const [name, meta] of Object.entries(value.dependenciesMeta)) {{ + // Handle string true/false or boolean values + const toBoolOrNull = (v) => {{ + if (v === undefined) return null; + if (typeof v === 'string') return v === 'true'; + return v; + }}; + dependenciesMeta[name] = {{ + built: toBoolOrNull(meta.built), + optional: toBoolOrNull(meta.optional), + unplugged: toBoolOrNull(meta.unplugged), + }}; + }} + }} + + const peerDependenciesMeta = {{}}; + if (value.peerDependenciesMeta) {{ + for (const [name, meta] of Object.entries(value.peerDependenciesMeta)) {{ + // Handle string true/false or boolean values + let optional = meta.optional; + if (typeof optional === 'string') {{ + optional = optional === 'true'; + }} + peerDependenciesMeta[name] = {{ + optional: optional || false, + }}; + }} + }} + + entries.push({{ + descriptors, + version: value.version ? String(value.version) : null, + resolution: value.resolution ? String(value.resolution) : null, + language_name: value.languageName || 'unknown', + link_type: value.linkType || 'hard', + checksum: value.checksum ? String(value.checksum) : null, + conditions: value.conditions ? String(value.conditions) : null, + dependencies, + dependencies_meta: dependenciesMeta, + peer_dependencies: peerDependencies, + peer_dependencies_meta: peerDependenciesMeta, + bin, + }}); +}} + +console.log(JSON.stringify({{ entries }})); +" + ) +} + +fn get_npm_global_prefix() -> Option { + let output = Command::new("npm") + .args(["config", "get", "prefix"]) + .output() + .ok()?; + if output.status.success() { + Some(String::from_utf8_lossy(&output.stdout).trim().to_string()) + } else { + None + } +} + +fn run_js_parser(fixture_path: &Path) -> Result { + let script = generate_js_script(fixture_path); + let script_path = std::env::temp_dir().join("berry-compare-parser.js"); + fs::write(&script_path, &script).map_err(|e| format!("Failed to write temp script: {e}"))?; + + let mut cmd = Command::new("node"); + cmd + .arg(&script_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + // Set NODE_PATH to include global node_modules + if let Some(prefix) = get_npm_global_prefix() { + let node_path = format!("{prefix}/lib/node_modules"); + cmd.env("NODE_PATH", node_path); + } + + let child = cmd + .spawn() + .map_err(|e| format!("Failed to spawn node: {e}"))?; + + let output = child + .wait_with_output() + .map_err(|e| format!("Failed to wait on node: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Node script failed: {stderr}")); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + serde_json::from_str(&stdout).map_err(|e| format!("Failed to parse JSON: {e}\nOutput: {stdout}")) +} + +fn time_rust_parser(contents: &str, iterations: usize) -> (Vec>, f64) { + let mut total_time = 0.0; + let mut entries = Vec::new(); + + for _ in 0..iterations { + let start = Instant::now(); + let result = parse_lockfile(contents).expect("Failed to parse lockfile"); + total_time += start.elapsed().as_secs_f64() * 1000.0; + entries = result.1.entries; + } + + let avg_time = total_time / iterations as f64; + (entries, avg_time) +} + +fn time_js_parser(fixture_path: &Path, iterations: usize) -> (JsParserOutput, f64) { + let mut total_time = 0.0; + let mut output = None; + + for _ in 0..iterations { + let start = Instant::now(); + let result = run_js_parser(fixture_path).expect("Failed to run JS parser"); + total_time += start.elapsed().as_secs_f64() * 1000.0; + output = Some(result); + } + + let avg_time = total_time / iterations as f64; + (output.unwrap(), avg_time) +} + +#[derive(Debug)] +struct Difference { + resolution: String, + field: String, + rust_value: String, + js_value: String, +} + +#[allow(clippy::too_many_lines)] +fn compare_entries(rust_entries: &[CompareEntry], js_entries: &[CompareEntry]) -> Vec { + let mut differences = Vec::new(); + + // Build lookup by resolution + let rust_by_res: HashMap<&str, &CompareEntry> = rust_entries + .iter() + .filter_map(|e| e.resolution.as_ref().map(|r| (r.as_str(), e))) + .collect(); + + let js_by_res: HashMap<&str, &CompareEntry> = js_entries + .iter() + .filter_map(|e| e.resolution.as_ref().map(|r| (r.as_str(), e))) + .collect(); + + // Check rust entries against JS + for (resolution, rust_entry) in &rust_by_res { + if let Some(js_entry) = js_by_res.get(resolution) { + // Compare descriptors + if rust_entry.descriptors != js_entry.descriptors { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "descriptors".to_string(), + rust_value: format!("{:?}", rust_entry.descriptors), + js_value: format!("{:?}", js_entry.descriptors), + }); + } + + // Compare fields + if rust_entry.version != js_entry.version { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "version".to_string(), + rust_value: format!("{:?}", rust_entry.version), + js_value: format!("{:?}", js_entry.version), + }); + } + + if rust_entry.language_name != js_entry.language_name { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "languageName".to_string(), + rust_value: rust_entry.language_name.clone(), + js_value: js_entry.language_name.clone(), + }); + } + + if rust_entry.link_type != js_entry.link_type { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "linkType".to_string(), + rust_value: rust_entry.link_type.clone(), + js_value: js_entry.link_type.clone(), + }); + } + + if rust_entry.checksum != js_entry.checksum { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "checksum".to_string(), + rust_value: format!("{:?}", rust_entry.checksum), + js_value: format!("{:?}", js_entry.checksum), + }); + } + + // Compare dependencies + for (dep_name, rust_range) in &rust_entry.dependencies { + if let Some(js_range) = js_entry.dependencies.get(dep_name) { + if rust_range != js_range { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependencies[{dep_name}]"), + rust_value: rust_range.clone(), + js_value: js_range.clone(), + }); + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependencies[{dep_name}]"), + rust_value: rust_range.clone(), + js_value: "(missing)".to_string(), + }); + } + } + + for (dep_name, js_range) in &js_entry.dependencies { + if !rust_entry.dependencies.contains_key(dep_name) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependencies[{dep_name}]"), + rust_value: "(missing)".to_string(), + js_value: js_range.clone(), + }); + } + } + + // Compare peer dependencies + for (dep_name, rust_range) in &rust_entry.peer_dependencies { + if let Some(js_range) = js_entry.peer_dependencies.get(dep_name) { + if rust_range != js_range { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependencies[{dep_name}]"), + rust_value: rust_range.clone(), + js_value: js_range.clone(), + }); + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependencies[{dep_name}]"), + rust_value: rust_range.clone(), + js_value: "(missing)".to_string(), + }); + } + } + + for (dep_name, js_range) in &js_entry.peer_dependencies { + if !rust_entry.peer_dependencies.contains_key(dep_name) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependencies[{dep_name}]"), + rust_value: "(missing)".to_string(), + js_value: js_range.clone(), + }); + } + } + + // Compare bin entries + for (bin_name, rust_path) in &rust_entry.bin { + if let Some(js_path) = js_entry.bin.get(bin_name) { + if rust_path != js_path { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("bin[{bin_name}]"), + rust_value: rust_path.clone(), + js_value: js_path.clone(), + }); + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("bin[{bin_name}]"), + rust_value: rust_path.clone(), + js_value: "(missing)".to_string(), + }); + } + } + + for (bin_name, js_path) in &js_entry.bin { + if !rust_entry.bin.contains_key(bin_name) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("bin[{bin_name}]"), + rust_value: "(missing)".to_string(), + js_value: js_path.clone(), + }); + } + } + + // Compare conditions + if rust_entry.conditions != js_entry.conditions { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "conditions".to_string(), + rust_value: format!("{:?}", rust_entry.conditions), + js_value: format!("{:?}", js_entry.conditions), + }); + } + + // Compare dependencies_meta + for (dep_name, rust_meta) in &rust_entry.dependencies_meta { + if let Some(js_meta) = js_entry.dependencies_meta.get(dep_name) { + if rust_meta != js_meta { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependenciesMeta[{dep_name}]"), + rust_value: format!("{rust_meta:?}"), + js_value: format!("{js_meta:?}"), + }); + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependenciesMeta[{dep_name}]"), + rust_value: format!("{rust_meta:?}"), + js_value: "(missing)".to_string(), + }); + } + } + + for (dep_name, js_meta) in &js_entry.dependencies_meta { + if !rust_entry.dependencies_meta.contains_key(dep_name) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("dependenciesMeta[{dep_name}]"), + rust_value: "(missing)".to_string(), + js_value: format!("{js_meta:?}"), + }); + } + } + + // Compare peer_dependencies_meta + for (dep_name, rust_meta) in &rust_entry.peer_dependencies_meta { + if let Some(js_meta) = js_entry.peer_dependencies_meta.get(dep_name) { + if rust_meta != js_meta { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependenciesMeta[{dep_name}]"), + rust_value: format!("{rust_meta:?}"), + js_value: format!("{js_meta:?}"), + }); + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependenciesMeta[{dep_name}]"), + rust_value: format!("{rust_meta:?}"), + js_value: "(missing)".to_string(), + }); + } + } + + for (dep_name, js_meta) in &js_entry.peer_dependencies_meta { + if !rust_entry.peer_dependencies_meta.contains_key(dep_name) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: format!("peerDependenciesMeta[{dep_name}]"), + rust_value: "(missing)".to_string(), + js_value: format!("{js_meta:?}"), + }); + } + } + } else { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "(entry)".to_string(), + rust_value: "present".to_string(), + js_value: "(missing)".to_string(), + }); + } + } + + // Check for entries in JS but not in Rust + for resolution in js_by_res.keys() { + if !rust_by_res.contains_key(resolution) { + differences.push(Difference { + resolution: (*resolution).to_string(), + field: "(entry)".to_string(), + rust_value: "(missing)".to_string(), + js_value: "present".to_string(), + }); + } + } + + differences +} + +fn main() { + let args = Args::parse(); + + let fixture_path = args.fixture.canonicalize().unwrap_or_else(|e| { + eprintln!( + "Error: Cannot find fixture file {}: {}", + args.fixture.display(), + e + ); + std::process::exit(1); + }); + + let contents = load_fixture_from_path(&fixture_path); + let file_size = contents.len(); + + println!("Comparing parsers for: {}", fixture_path.display()); + println!("File size: {file_size} bytes"); + println!("Iterations: {}", args.iterations); + println!(); + + // Time Rust parser + print!("Running Rust parser... "); + std::io::stdout().flush().unwrap(); + let (rust_entries, rust_time) = time_rust_parser(&contents, args.iterations); + println!("done"); + + // Time JS parser + print!("Running JS parser... "); + std::io::stdout().flush().unwrap(); + let (js_output, js_time) = time_js_parser(&fixture_path, args.iterations); + println!("done"); + + // Convert rust entries for comparison + let rust_compare: Vec = rust_entries.iter().map(entry_to_compare).collect(); + + println!(); + println!("=== Performance ==="); + println!("Rust: {rust_time:.3} ms avg"); + println!("JS: {js_time:.3} ms avg"); + let speedup = js_time / rust_time; + println!("Speedup: {speedup:.2}x"); + + println!(); + println!("=== Entry Counts ==="); + println!("Rust: {} entries", rust_compare.len()); + println!("JS: {} entries", js_output.entries.len()); + + // Compare entries + let differences = compare_entries(&rust_compare, &js_output.entries); + + println!(); + println!("=== Differences ==="); + if differences.is_empty() { + println!("No differences found"); + } else { + println!("{} differences found:", differences.len()); + for (i, diff) in differences.iter().enumerate().take(20) { + println!( + " {}. [{}] {}: rust={}, js={}", + i + 1, + diff.resolution, + diff.field, + diff.rust_value, + diff.js_value + ); + } + if differences.len() > 20 { + println!(" ... and {} more", differences.len() - 20); + } + } +} diff --git a/crates/berry-core/Cargo.toml b/crates/berry-core/Cargo.toml index 065e262..360bec8 100644 --- a/crates/berry-core/Cargo.toml +++ b/crates/berry-core/Cargo.toml @@ -6,7 +6,7 @@ edition.workspace = true license.workspace = true name = "berry" repository.workspace = true -version = "0.5.0" +version = "0.5.1-beta0-js.0" [dependencies] nom = { workspace = true } diff --git a/crates/berry-core/src/ident.rs b/crates/berry-core/src/ident.rs index 1ebbca1..beea19c 100644 --- a/crates/berry-core/src/ident.rs +++ b/crates/berry-core/src/ident.rs @@ -59,10 +59,9 @@ impl<'a> Range<'a> { /// Returns the selector part (e.g., "^1.2.3", "packages/a", or the full raw when no protocol). pub fn selector(&self) -> &'a str { - match self.protocol_sep_index { - Some(i) => &self.raw[i + 1..], - None => self.raw, - } + self + .protocol_sep_index + .map_or(self.raw, |i| &self.raw[i + 1..]) } } diff --git a/crates/berry-core/src/lockfile.rs b/crates/berry-core/src/lockfile.rs index 368bfd1..2617f6f 100644 --- a/crates/berry-core/src/lockfile.rs +++ b/crates/berry-core/src/lockfile.rs @@ -27,7 +27,7 @@ pub struct Lockfile<'a> { /// A single lockfile entry is a mapping of one or more descriptors to a single package #[derive(Debug)] pub struct Entry<'a> { - /// The descriptors of the entry (using SmallVec for stack allocation in common case) + /// The descriptors of the entry (using `SmallVec` for stack allocation in common case) pub descriptors: SmallVec<[Descriptor<'a>; 4]>, /// The package of the entry pub package: Package<'a>, diff --git a/crates/berry-core/src/parse.rs b/crates/berry-core/src/parse.rs index d2d08d7..b010dd2 100644 --- a/crates/berry-core/src/parse.rs +++ b/crates/berry-core/src/parse.rs @@ -95,7 +95,7 @@ pub fn parse_package_entry( } /// Parse a package descriptor line like: "debug@npm:1.0.0":, eslint-config-turbo@latest:, or ? "conditional@npm:1.0.0": -/// Uses SmallVec<[Descriptor; 4]> since most descriptor lines have 1-3 descriptors +/// Uses `SmallVec<[Descriptor; 4]>` since most descriptor lines have 1-3 descriptors pub fn parse_descriptor_line(input: &str) -> IResult<&str, SmallVec<[Descriptor<'_>; 4]>> { // Check for optional '? ' prefix for wrapped-line descriptors let (rest, has_line_wrap_marker) = opt(tag("? ")).parse(input)?; @@ -164,109 +164,28 @@ pub fn parse_descriptor_line(input: &str) -> IResult<&str, SmallVec<[Descriptor< /// Convert parsed descriptor data (borrowed strings) to Descriptor /// All strings remain borrowed from the input - zero allocation! #[inline] -fn convert_to_descriptor<'a>( - (name_part, protocol, range): (&'a str, &'a str, &'a str), -) -> Descriptor<'a> { +fn convert_to_descriptor<'a>((name_part, full_range): (&'a str, &'a str)) -> Descriptor<'a> { let ident = parse_name_to_ident(name_part); - // For the range, we need to find or construct the full range string - // Since we're borrowing, we need to handle this carefully - // The range is either just `range` (no protocol) or we need to find it in the original - // Actually, the original descriptor_string contains the full range, so we can slice it - // But for now, let's use the parsed parts - // If protocol is empty, range is the full range - // If protocol is not empty, we need to reconstruct or find original - // - // For zero-copy, we need to work with the original string - // The simplest approach: find the @ in name_part and take everything after - // But that's already done by parse_single_descriptor - // - // The issue: Descriptor::new takes range_raw: &'a str - // We have protocol and range separately, need to combine them - // But we can't allocate! So we need to find the original substring - // - // Actually, looking at how parse_single_descriptor works, the range returned - // is the full range including protocol:selector or just selector - // Let me check - no, it returns (name_part, protocol, range) separately - // - // For true zero-copy, we'd need to restructure to keep the original range string - // For now, let's construct it - this is ONE allocation per descriptor - // Still much better than before - let full_range = if protocol.is_empty() { - range - } else { - // We need to find the original range in the input - // This is tricky - for now fall back to looking at the original - // The name_part ends where @ is, then protocol:range follows - // But we don't have access to the original here - // - // Actually we can reconstruct from the input the parse_single_descriptor receives - // But we don't have it here either - // - // For now: use a workaround - the range includes protocol:selector in the original - // Let's look at parse_single_descriptor more carefully... - // It returns protocol and range separately after parsing - // But the original string has them together! - // - // We need to change parse_single_descriptor to return the full range - // For now, let's return just the range part and reconstruct - // This means we'll need ONE allocation still for protocol:range case - // - // TODO: Optimize this to avoid allocation by returning full range slice - range // For now, just use the selector part; we'll fix this - }; Descriptor::new(ident, full_range) } /// Parse a single descriptor string like "debug@npm:1.0.0", "c@*", or "is-odd@patch:is-odd@npm%3A3.0.1#~/.yarn/patches/is-odd-npm-3.0.1-93c3c3f41b.patch" /// Returns borrowed strings to avoid allocations during parsing -/// Returns (name_part, full_range) where full_range includes protocol if present -fn parse_single_descriptor(input: &str) -> IResult<&str, (&str, &str, &str)> { - // Find the @ that separates name from range - // For scoped packages like @babel/core@npm:1.0.0, we need the LAST @ before range - // Strategy: find package name first, then take rest as range - - // Try patch protocol format first (e.g., patch:is-odd@npm%3A3.0.1#~/.yarn/patches/...) - if let Ok((remaining, (name_part, _, protocol, _, patch_range))) = ( - parse_package_name, - char('@'), - parse_protocol, - char(':'), - parse_patch_range, - ) - .parse(input) - && protocol == "patch" - { - return Ok((remaining, (name_part, protocol, patch_range))); - } +/// Returns (`name_part`, `full_range`) where `full_range` includes protocol if present (e.g., "npm:^1.0.0") +fn parse_single_descriptor(input: &str) -> IResult<&str, (&str, &str)> { + // Parse package name first + let (after_name, name_part) = parse_package_name(input)?; - // Try protocol:range format (e.g., npm:1.0.0) - if let Ok((remaining, (name_part, _, protocol, _, range))) = ( - parse_package_name, - char('@'), - parse_protocol, - char(':'), - take_while1(|c: char| c != ',' && c != '"'), - ) - .parse(input) - { - return Ok((remaining, (name_part, protocol, range))); - } + // Parse @ separator + let (after_at, _) = char('@').parse(after_name)?; - // Try simple range format (e.g., * for c@*) - if let Ok((remaining, (name_part, _, range))) = ( - parse_package_name, - char('@'), - take_while1(|c: char| c != ',' && c != '"'), - ) - .parse(input) - { - return Ok((remaining, (name_part, "", range))); - } + // Everything after @ until comma or quote is the full range (including protocol if present) + let (remaining, full_range) = take_while1(|c: char| c != ',' && c != '"').parse(after_at)?; - Err(nom::Err::Error(nom::error::Error::new( - input, - nom::error::ErrorKind::Alt, - ))) + // Trim trailing whitespace from range to match JS parser behavior (e.g., "npm:^1.0.4 " -> "npm:^1.0.4") + let full_range = full_range.trim_end(); + + Ok((remaining, (name_part, full_range))) } /// Helper function to parse name part into Ident (zero-copy) @@ -308,19 +227,6 @@ fn parse_package_name(input: &str) -> IResult<&str, &str> { .parse(input) } -/// Parse protocol part like npm, workspace, git, etc. -fn parse_protocol(input: &str) -> IResult<&str, &str> { - // Support common protocol tokens including git+ssh - take_while1(|c: char| c.is_alphanumeric() || c == '-' || c == '_' || c == '+').parse(input) -} - -/// Parse patch range which can be complex like: -/// - "is-odd@npm%3A3.0.1#~/.yarn/patches/is-odd-npm-3.0.1-93c3c3f41b.patch" -/// - "typescript@npm%3A^5.8.3#optional!builtin" -fn parse_patch_range(input: &str) -> IResult<&str, &str> { - take_while1(|c: char| c != ',' && c != '"').parse(input) -} - /// Parse indented key-value properties for a package /// perf: use `fold_many0` to build the Package directly without intermediate Vec allocation pub fn parse_package_properties(input: &str) -> IResult<&str, Package<'_>> { @@ -416,7 +322,7 @@ pub fn parse_package_properties(input: &str) -> IResult<&str, Package<'_>> { .parse(input)?; // Consume any trailing whitespace and blank lines - let (rest, _) = fold_many0( + let (rest, ()) = fold_many0( alt((tag("\n"), tag(" "), tag("\t"), tag("\r"))), || (), |(), _| (), @@ -979,4 +885,119 @@ __metadata: assert_eq!(meta.len(), 2); assert!(remaining.is_empty()); } + + #[test] + fn test_descriptor_range_includes_npm_protocol() { + let input = r#""debug@npm:^4.3.0":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "debug"); + assert_eq!(descriptor.range(), "npm:^4.3.0"); + } + + #[test] + fn test_descriptor_range_includes_npm_protocol_scoped() { + let input = r#""@babel/core@npm:^7.0.0":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "core"); + assert_eq!(descriptor.ident().scope(), Some("@babel")); + assert_eq!(descriptor.range(), "npm:^7.0.0"); + } + + #[test] + fn test_descriptor_range_no_protocol() { + let input = r#""c@*":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "c"); + assert_eq!(descriptor.range(), "*"); + } + + #[test] + fn test_descriptor_range_workspace_protocol() { + let input = r#""my-package@workspace:packages/my-package":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "my-package"); + assert_eq!(descriptor.range(), "workspace:packages/my-package"); + } + + #[test] + fn test_descriptor_range_multiple_with_npm_protocol() { + let input = r#""prompts@npm:^2.0.1, prompts@npm:^2.4.2":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 2); + + assert_eq!(descriptors[0].ident().name(), "prompts"); + assert_eq!(descriptors[0].range(), "npm:^2.0.1"); + + assert_eq!(descriptors[1].ident().name(), "prompts"); + assert_eq!(descriptors[1].range(), "npm:^2.4.2"); + } + + #[test] + fn test_descriptor_range_mixed_protocols() { + let input = r#""c@*, c@workspace:packages/c":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 2); + + assert_eq!(descriptors[0].ident().name(), "c"); + assert_eq!(descriptors[0].range(), "*"); + + assert_eq!(descriptors[1].ident().name(), "c"); + assert_eq!(descriptors[1].range(), "workspace:packages/c"); + } + + #[test] + fn test_descriptor_range_patch_protocol() { + let input = r#""is-odd@patch:is-odd@npm%3A3.0.1#~/.yarn/patches/is-odd.patch":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "is-odd"); + assert_eq!( + descriptor.range(), + "patch:is-odd@npm%3A3.0.1#~/.yarn/patches/is-odd.patch" + ); + } + + #[test] + fn test_descriptor_range_trims_trailing_whitespace() { + // Some lockfiles have trailing spaces in descriptor ranges (e.g., "@oclif/screen@npm^1.0.4 ") + let input = r#""@oclif/screen@npm:^1.0.4 ":"#; + let result = parse_descriptor_line(input); + assert!(result.is_ok()); + let (_, descriptors) = result.unwrap(); + assert_eq!(descriptors.len(), 1); + + let descriptor = &descriptors[0]; + assert_eq!(descriptor.ident().name(), "screen"); + assert_eq!(descriptor.ident().scope(), Some("@oclif")); + // Trailing whitespace should be trimmed to match JS parser behavior + assert_eq!(descriptor.range(), "npm:^1.0.4"); + } } diff --git a/fixtures/auxiliary-packages.yarn.lock b/fixtures/auxiliary-packages.yarn.lock index 1d1c957..9569055 100644 --- a/fixtures/auxiliary-packages.yarn.lock +++ b/fixtures/auxiliary-packages.yarn.lock @@ -1181,4 +1181,3 @@ __metadata: checksum: f77b3d8d00310def622123df93d4ee654fc6a0096182af8bd60679ddcdfb3474c56c6c7190817c84a2785648cdee9d721c0154eb45698c62176c322fb46fc700 languageName: node linkType: hard -`; diff --git a/fixtures/mixed-keys.yarn.lock b/fixtures/mixed-keys.yarn.lock index fcecc5f..98b9dc2 100644 --- a/fixtures/mixed-keys.yarn.lock +++ b/fixtures/mixed-keys.yarn.lock @@ -174,4 +174,3 @@ __metadata: checksum: 371733296dc2d616900ce15a0049dca0ef67597d6394c57347ba334393599e800bab03c41d4d45221b6bc967b8c453ec3ae4749eff3894202d16800fdfe0e238 languageName: node linkType: hard -` diff --git a/fixtures/resolutions-patches.yarn.lock b/fixtures/resolutions-patches.yarn.lock index 921a4d2..cd5410c 100644 --- a/fixtures/resolutions-patches.yarn.lock +++ b/fixtures/resolutions-patches.yarn.lock @@ -54521,4 +54521,3 @@ __metadata: checksum: 10/00c4636f10556174858bddd67758ef1e3046d005e9890a208fa5dd9296ec69a3e93ce8300a54d84cf72deea188503d8c5aa67c515daf79da40de378786832807 languageName: node linkType: hard -`; diff --git a/fixtures/workspaces.yarn.lock b/fixtures/workspaces.yarn.lock index 01e9b07..17e9e65 100644 --- a/fixtures/workspaces.yarn.lock +++ b/fixtures/workspaces.yarn.lock @@ -64,4 +64,3 @@ __metadata: react: ^17.0.0 languageName: unknown linkType: soft -`;