Skip to content

fix: Improve renamed package detection #575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions fixtures/icu-rename/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
name = "icu-rename-test"
version = "0.1.0"
edition = "2021"

[dependencies]
icu_locid = "1.0.0" # This is the renamed package we want to test
65 changes: 65 additions & 0 deletions fixtures/icu-rename/after/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions fixtures/icu-rename/after/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "icu"
version = "1.0.0"
authors = ["Unicode Team <[email protected]>"]
description = "Unicode ICU locale identifier APIs"
edition = "2021"
repository = "https://github.com/unicode-org/icu4x"
license = "MIT OR Apache-2.0"

[dependencies]
serde = { version = "1.0", features = ["derive"], optional = true }
4 changes: 4 additions & 0 deletions fixtures/icu-rename/after/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Dummy file for the fixture
pub fn hello() -> &'static str {
"Hello from icu"
}
11 changes: 11 additions & 0 deletions fixtures/icu-rename/before/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "icu_locid"
version = "1.0.0"
authors = ["Unicode Team <[email protected]>"]
description = "Unicode ICU locale identifier APIs"
edition = "2021"
repository = "https://github.com/unicode-org/icu4x"
license = "MIT OR Apache-2.0"

[dependencies]
serde = { version = "1.0", features = ["derive"], optional = true }
4 changes: 4 additions & 0 deletions fixtures/icu-rename/before/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Dummy file for the fixture
pub fn hello() -> &'static str {
"Hello from icu_locid"
}
6 changes: 6 additions & 0 deletions fixtures/icu-rename/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Simple usage of the dependency
extern crate icu_locid;

pub fn get_crate_name() -> &'static str {
"icu_locid"
}
160 changes: 144 additions & 16 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ impl<'a> From<&'a Dependency> for DepReq<'a> {

#[macro_export]
macro_rules! warn {
($fmt:expr, $($arg:tt)*) => {
(
$fmt:expr,
$($arg:tt)*
) => {
if $crate::opts::get().no_warnings {
log::debug!($fmt, $($arg)*);
} else {
Expand Down Expand Up @@ -220,10 +223,16 @@ thread_local! {
// smoelius: A reason for having the former is the following. Multiple packages map to the same
// url, and multiple urls map to the same shortened url. Thus, a cache keyed by url has a
// greater chance of a cache hit.
static GENERAL_STATUS_CACHE: RefCell<HashMap<Url<'static>, RepoStatus<'static, ()>>> = RefCell::new(HashMap::new());
static GENERAL_STATUS_CACHE: RefCell<
HashMap<Url<'static>, RepoStatus<'static, ()>>
> = RefCell::new(HashMap::new());
static LATEST_VERSION_CACHE: RefCell<HashMap<String, Version>> = RefCell::new(HashMap::new());
static TIMESTAMP_CACHE: RefCell<HashMap<Url<'static>, RepoStatus<'static, SystemTime>>> = RefCell::new(HashMap::new());
static REPOSITORY_CACHE: RefCell<HashMap<Url<'static>, RepoStatus<'static, PathBuf>>> = RefCell::new(HashMap::new());
static TIMESTAMP_CACHE: RefCell<
HashMap<Url<'static>, RepoStatus<'static, SystemTime>>
> = RefCell::new(HashMap::new());
static REPOSITORY_CACHE: RefCell<
HashMap<Url<'static>, RepoStatus<'static, PathBuf>>
> = RefCell::new(HashMap::new());
}

static TOKEN_FOUND: AtomicBool = AtomicBool::new(false);
Expand Down Expand Up @@ -280,8 +289,9 @@ fn unmaintained() -> Result<bool> {
);

if std::io::stderr().is_terminal() && !opts::get().verbose {
PROGRESS
.with_borrow_mut(|progress| *progress = Some(progress::Progress::new(packages.len())));
PROGRESS.with_borrow_mut(|progress| {
*progress = Some(progress::Progress::new(packages.len()));
});
}

for pkg in packages {
Expand Down Expand Up @@ -554,11 +564,11 @@ fn general_status(name: &str, url: Url) -> Result<RepoStatus<'static, ()>> {
};
verbose::wrap!(
|| {
let repo_status = if use_github_api {
let repo_status = (if use_github_api {
Github::archival_status(url)
} else {
curl::existence(url)
}
})
.unwrap_or_else(|error| {
warn!("failed to determine `{}` {}: {}", name, what, error);
RepoStatus::Success(url, ())
Expand Down Expand Up @@ -671,7 +681,7 @@ fn latest_version(name: &str) -> Result<Version> {
},
ToString::to_string,
"latest version of `{}` using crates.io index",
name,
name
)
})
}
Expand Down Expand Up @@ -768,7 +778,7 @@ fn timestamp_from_clone(pkg: &Package) -> Result<RepoStatus<'_, SystemTime>> {
}

#[cfg_attr(dylint_lib = "general", allow(non_local_effect_before_error_return))]
#[cfg_attr(dylint_lib = "supplementary", allow(commented_code))]
#[cfg_attr(dylint_lib = "supplementary", allow(commented_out_code))]
fn clone_repository(pkg: &Package) -> Result<RepoStatus<PathBuf>> {
let repo_status = REPOSITORY_CACHE.with_borrow_mut(|repository_cache| -> Result<_> {
on_disk_cache::with_cache(|cache| -> Result<_> {
Expand All @@ -780,10 +790,6 @@ fn clone_repository(pkg: &Package) -> Result<RepoStatus<PathBuf>> {
}
// smoelius: To make verbose printing easier, "membership" is printed regardless of the
// check's purpose, and the `Purpose` type was removed.
/* let what = match purpose {
Purpose::Membership => "membership",
Purpose::Timestamp => "timestamp",
}; */
verbose::wrap!(
|| {
let url_and_dir = cache.clone_repository(pkg);
Expand Down Expand Up @@ -865,7 +871,7 @@ fn membership_in_clone(pkg: &Package, repo_dir: &Path) -> Result<bool> {
continue;
}
let contents = show(repo_dir, path)?;
let Ok(table) = contents.parse::<Table>()
let Ok(table) = contents.parse::<Table>() else
/* smoelius: This "failed to parse" warning is a little too noisy.
.map_err(|error| {
warn!(
Expand All @@ -874,9 +880,11 @@ fn membership_in_clone(pkg: &Package, repo_dir: &Path) -> Result<bool> {
error.to_string().trim_end()
);
}) */
else {
{
continue;
};

// First check exact name match (existing behavior)
if table
.get("package")
.and_then(Value::as_table)
Expand All @@ -886,11 +894,131 @@ fn membership_in_clone(pkg: &Package, repo_dir: &Path) -> Result<bool> {
{
return Ok(true);
}

// If name doesn't match, check if it might be a renamed package
if is_same_package_except_name(pkg, &table) {
return Ok(true);
}
}

Ok(false)
}

// Macro for comparing fields that implement Deserialize, PartialEq, and Default.
macro_rules! check_field {
($pkg:expr, $pkg_table:expr, $field:ident, $FieldType:ty) => {{
let field_name_str = stringify!($field);
let original_value: &$FieldType = &$pkg.$field;

match $pkg_table.get(field_name_str) {
Some(value) => {
let deserializer = value.clone().into_deserializer();
match <$FieldType as serde::Deserialize>::deserialize(deserializer) {
Ok(candidate_value) => {
let typed_candidate: $FieldType = candidate_value;
if original_value != &typed_candidate {
return false;
}
}
Err(e) => {
warn!(
"Failed to deserialize field '{}' for package comparison: {}. \
Skipping check.",
field_name_str, e
);
return false;
}
}
}
None => {
let default_value: $FieldType = <$FieldType as Default>::default();
if original_value != &default_value {
return false;
}
}
}
}};
}

/// Checks if a package is the same as one defined in a Cargo.toml table, except for its name.
/// This helps identify renamed packages.
fn is_same_package_except_name(pkg: &Package, cargo_toml: &Table) -> bool {
use cargo_metadata::Edition;
use cargo_metadata::semver::Version;
use serde::de::IntoDeserializer;

let Some(pkg_table) = cargo_toml.get("package").and_then(Value::as_table) else {
return false;
};

// --- Version Check (Direct) --- (No Default impl)
let field_name = "version";
match pkg_table.get(field_name) {
Some(value) => {
let deserializer = value.clone().into_deserializer();
match <Version as serde::Deserialize>::deserialize(deserializer) {
Ok(candidate_value) => {
if pkg.version != candidate_value {
return false;
}
}
Err(e) => {
warn!(
"Failed to deserialize field '{}' for package comparison: {}. Skipping \
check.",
field_name, e
);
}
}
}
None => {
return false;
} // Missing version is a mismatch
}

// --- Fields checked with macro (Implement Default) ---
check_field!(pkg, pkg_table, authors, Vec<String>);
check_field!(pkg, pkg_table, description, Option<String>);
check_field!(pkg, pkg_table, repository, Option<String>);
check_field!(pkg, pkg_table, homepage, Option<String>);
check_field!(pkg, pkg_table, documentation, Option<String>);
check_field!(pkg, pkg_table, keywords, Vec<String>);
check_field!(pkg, pkg_table, categories, Vec<String>);
check_field!(pkg, pkg_table, license, Option<String>);
check_field!(pkg, pkg_table, edition, Edition);
// check_field!(pkg, pkg_table, license_file, Option<String>); // Requires Utf8PathBuf ->
// Default? check_field!(pkg, pkg_table, publish); // Type: Option<Vec<String>> or bool?

// --- Rust Version Check (Direct) --- (Option<Version>, No Default impl for Version)
let field_name = "rust_version";
match pkg_table.get(field_name) {
Some(value) => {
let deserializer = value.clone().into_deserializer();
match <Option<Version> as serde::Deserialize>::deserialize(deserializer) {
Ok(candidate_value) => {
if pkg.rust_version != candidate_value {
return false;
}
}
Err(e) => {
warn!(
"Failed to deserialize field '{}' for package comparison: {}. Skipping \
check.",
field_name, e
);
return false;
}
}
}
None => {
if pkg.rust_version.is_some() {
return false;
} // Compare original pkg.rust_version to None
}
}
true
}

fn show(repo_dir: &Path, path: &Path) -> Result<String> {
let mut command = Command::new("git");
command.args(["show", &format!("HEAD:{}", path.display())]);
Expand Down
Loading
Loading