diff --git a/dylint/Cargo.toml b/dylint/Cargo.toml index cda64ed27..ddeebbc66 100644 --- a/dylint/Cargo.toml +++ b/dylint/Cargo.toml @@ -76,6 +76,7 @@ package_options = [ "rewriter", "syntect", "walkdir", + "toml", ] __cargo_cli = [ "cargo-util", diff --git a/dylint/src/package_options/mod.rs b/dylint/src/package_options/mod.rs index a13648f5d..abcc60772 100644 --- a/dylint/src/package_options/mod.rs +++ b/dylint/src/package_options/mod.rs @@ -109,29 +109,24 @@ pub fn upgrade_package(opts: &opts::Dylint, upgrade_opts: &opts::Upgrade) -> Res None => ¤t_dir, }; - let rev = { - let revs = Revs::new(opts.quiet)?; - let mut iter = revs.iter()?; - match &upgrade_opts.rust_version { - Some(rust_version) => { - let clippy_utils_version = clippy_utils_version_from_rust_version(rust_version)?; - // smoelius: The next iterative search is a bottleneck. It should be a binary - // search. - iter.find(|result| { - result - .as_ref() - .map_or(true, |rev| rev.version == clippy_utils_version) - }) - .unwrap_or_else(|| { - Err(anyhow!( - "Could not find `clippy_utils` version `{}`", - clippy_utils_version - )) - })? - } - None => iter.next().unwrap_or_else(|| { - Err(anyhow!("Could not determine latest `clippy_utils` version")) - })?, + // Instantiate Revs once + let revs = Revs::new(opts.quiet)?; + + let rev = match &upgrade_opts.rust_version { + Some(rust_version) => { + // Find the specific version using the new find_version + let clippy_utils_version = clippy_utils_version_from_rust_version(rust_version)?; + let found_version = revs.find_version(&clippy_utils_version)?; + found_version.ok_or_else(|| { + anyhow!( + "Could not find `clippy_utils` version `{}`", + clippy_utils_version + ) + })? + } + None => { + // Find the latest version using the new find_latest_rev + revs.find_latest_rev()? } }; @@ -150,7 +145,7 @@ pub fn upgrade_package(opts: &opts::Dylint, upgrade_opts: &opts::Upgrade) -> Res rev.channel ); } - }; + } let rust_toolchain_path = path.join("rust-toolchain"); let cargo_toml_path = path.join("Cargo.toml"); diff --git a/dylint/src/package_options/revs.rs b/dylint/src/package_options/revs.rs index 54502635c..7d9559a70 100644 --- a/dylint/src/package_options/revs.rs +++ b/dylint/src/package_options/revs.rs @@ -1,123 +1,297 @@ use super::common::clippy_repository; use anyhow::{Context, Result, anyhow}; use dylint_internal::{ - clippy_utils::{clippy_utils_package_version, toolchain_channel}, - git2::{Commit, ObjectType, Oid, Repository}, + clippy_utils::toolchain_channel, + git2::{Commit, Oid, Repository, Sort}, }; use if_chain::if_chain; -use std::rc::Rc; +use semver::Version; +use std::{path::Path, rc::Rc, time::Instant}; +use toml::Value; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct Rev { pub version: String, pub channel: String, pub oid: Oid, } +// Holds the repository and cached commit data for searching. pub struct Revs { - repository: Rc, -} - -pub struct RevIter<'revs> { - repository: Rc, - commit: Commit<'revs>, - curr_rev: Option, + repo: Rc, + quiet: bool, } impl Revs { pub fn new(quiet: bool) -> Result { - let repository = clippy_repository(quiet)?; - - Ok(Self { repository }) + let start = Instant::now(); + let repo = clippy_repository(quiet)?; + if !quiet { + eprintln!("Revs repository initialization took: {:?}", start.elapsed()); + } + Ok(Self { repo, quiet }) } - #[allow(clippy::iter_not_returning_iterator)] - pub fn iter(&self) -> Result { - let path = self - .repository - .workdir() - .ok_or_else(|| anyhow!("Could not get working directory"))?; - let object = { - let head = self.repository.head()?; - let oid = head.target().ok_or_else(|| anyhow!("Could not get HEAD"))?; - self.repository.find_object(oid, Some(ObjectType::Commit))? - }; - let commit = object - .as_commit() - .ok_or_else(|| anyhow!("Object is not a commit"))?; - let version = clippy_utils_package_version(path)?; - let channel = toolchain_channel(path)?; + // Extracts Rev info from a specific commit by parsing blobs. + fn get_rev_info_for_commit(&self, commit: &Commit) -> Result> { + let tree = commit.tree().context("Failed to get commit tree")?; let oid = commit.id(); - Ok(RevIter { - repository: self.repository.clone(), - commit: commit.clone(), - curr_rev: Some(Rev { - version, - channel, - oid, - }), - }) + + // Try to get version from Cargo.toml + let version = if_chain! { + if let Ok(cargo_entry) = tree.get_path(Path::new("Cargo.toml")); + if let Ok(blob) = self.repo.find_blob(cargo_entry.id()); + if let Ok(content) = std::str::from_utf8(blob.content()); + if let Ok(cargo_toml) = content.parse::(); + if let Some(package) = cargo_toml.get("package"); + if let Some(version_val) = package.get("version"); + if let Some(version_str) = version_val.as_str(); + then { + Some(version_str.to_string()) + } else { + None + } + }; + + // Return early if version was not found + let Some(version_str) = version else { + return Ok(None); + }; + + // Create a temporary directory to extract files for toolchain detection + let temp_dir = tempfile::tempdir().context("Failed to create temporary directory")?; + let temp_path = temp_dir.path(); + + // Extract rust-toolchain files to the temporary directory + let mut found_toolchain_file = false; + for filename in &["rust-toolchain", "rust-toolchain.toml"] { + if_chain! { + if let Ok(entry) = tree.get_path(Path::new(filename)); + if let Ok(blob) = self.repo.find_blob(entry.id()); + if let Ok(content) = std::str::from_utf8(blob.content()); + then { + let file_path = temp_path.join(filename); + std::fs::write(&file_path, content) + .with_context(|| format!("Failed to write {} to temp dir", filename))?; + found_toolchain_file = true; + } + } + } + + // If no toolchain files were found, return None + if !found_toolchain_file { + return Ok(None); + } + + let channel = match toolchain_channel(temp_path) { + Ok(chan) => chan, + Err(_) => return Ok(None), + }; + + Ok(Some(Rev { + version: version_str, + channel, + oid, + })) } -} -impl Iterator for RevIter<'_> { - type Item = Result; - - // smoelius: I think it is okay to ignore the `non_local_effect_before_error_return` warning - // here. If `self.commit` were not updated, the same commits would be traversed the next time - // `next` was called. - #[cfg_attr(dylint_lib = "general", allow(non_local_effect_before_error_return))] - fn next(&mut self) -> Option { - (|| -> Result> { - let mut prev_rev: Option = None; - loop { - let curr_rev = if let Some(rev) = self.curr_rev.take() { - rev - } else { - let path = self - .repository - .workdir() - .ok_or_else(|| anyhow!("Could not get working directory"))?; - // smoelius: Note that this is not `git log`'s default behavior. Rather, this - // behavior corresponds to: - // git log --first-parent - let commit = if let Some(commit) = self.commit.parents().next() { - self.commit = commit.clone(); - commit - } else { - return Ok(None); - }; - self.repository - .checkout_tree(commit.as_object(), None) - .with_context(|| { - format!("`checkout_tree` failed for `{:?}`", commit.as_object()) - })?; - self.repository - .set_head_detached(commit.id()) - .with_context(|| { - format!("`set_head_detached` failed for `{}`", commit.id()) - })?; - let version = clippy_utils_package_version(path)?; - let channel = toolchain_channel(path)?; - let oid = commit.id(); - Rev { - version, - channel, - oid, + // Finds the latest Rev by linearly searching backwards from HEAD. + pub fn find_latest_rev(&self) -> Result { + let start = Instant::now(); + let mut revwalk = self + .repo + .revwalk() + .context("Failed to create revision walker")?; + revwalk.push_head().context("Failed to push HEAD")?; + revwalk + .set_sorting(Sort::TOPOLOGICAL) + .context("Failed to sort by time")?; // Ensure newest first + + for oid_result in revwalk { + let oid = oid_result.context("Failed to get commit OID")?; + let commit = self + .repo + .find_commit(oid) + .context("Failed to find commit")?; + if let Some(rev_info) = self.get_rev_info_for_commit(&commit)? { + if !self.quiet { + eprintln!( + "Found latest version {} in {:?}", + rev_info.version, + start.elapsed() + ); + } + return Ok(rev_info); + } + } + + Err(anyhow!("Could not determine latest `clippy_utils` version")) + } + + // Finds the Rev for a specific target version using lazy loading and binary search. + #[allow(clippy::too_many_lines)] + pub fn find_version(&self, target_version_str: &str) -> Result> { + let start = Instant::now(); + let target_version = Version::parse(target_version_str) + .with_context(|| format!("Invalid target version string: {target_version_str}"))?; + + let mut commit_data = Vec::>::new(); // Stores loaded commits' Rev Option (newest first) + let mut revwalk = self + .repo + .revwalk() + .context("Failed to create revision walker")?; + revwalk.push_head().context("Failed to push HEAD")?; + revwalk + .set_sorting(Sort::TOPOLOGICAL) + .context("Failed to sort by time")?; + + let mut limit = 1; + let mut oldest_commit_rev: Option = None; + let mut head_rev: Option = None; + + // 1. Lazy Loading phase + loop { + // Use iterator methods to collect the next batch of OIDs + let current_batch_oids: Vec = + revwalk.by_ref().take(limit).collect::>()?; + + let reached_end = current_batch_oids.len() < limit; + + if current_batch_oids.is_empty() && commit_data.is_empty() { + // If HEAD commit has no parseable version and no history provided. + return Ok(None); + } + + for oid in current_batch_oids { + let commit = self + .repo + .find_commit(oid) + .context("Failed to find commit")?; + + let rev_info = self.get_rev_info_for_commit(&commit)?; + + if head_rev.is_none() { + // First commit processed + if let Some(ref rev) = rev_info { + head_rev = Some(rev.clone()); + let head_version = Version::parse(&rev.version)?; + // Early exit if target is newer than the latest known version + if target_version > head_version { + if !self.quiet { + eprintln!( + "Target version {} is newer than HEAD version {}, search took {:?}", + target_version_str, + rev.version, + start.elapsed() + ); + } + return Ok(None); + } } - }; - if_chain! { - if let Some(prev_rev) = prev_rev; - if prev_rev.version != curr_rev.version; - then { - self.curr_rev = Some(curr_rev); - return Ok(Some(prev_rev)); + // If HEAD has no version, continue loading history. + } + + // Update the oldest Rev found so far in this loading phase + if let Some(ref rev) = rev_info { + oldest_commit_rev = Some(rev.clone()); + } + + commit_data.push(rev_info); + } + + if reached_end { + break; + } + + // Check if the oldest Rev version found brackets the target version + if let Some(ref oldest_rev) = oldest_commit_rev { + let oldest_v = Version::parse(&oldest_rev.version)?; + if oldest_v < target_version { + break; + } + } + // Else: oldest found is still newer than target, or no version found yet. Double limit + // and continue. + + limit *= 2; + } + + if !self.quiet { + eprintln!( + "Lazy loading phase collected {} commits, took {:?}", + commit_data.len(), + start.elapsed() + ); + } + + // 2. Binary Search phase + commit_data.reverse(); + + let search_start = Instant::now(); + + // Use binary_search to find the appropriate Rev + let result_rev = { + // First, filter out None values and extract just the versions with their indices + let versioned_commits: Vec<(usize, &Rev)> = commit_data + .iter() + .enumerate() + .filter_map(|(idx, rev_opt)| rev_opt.as_ref().map(|rev| (idx, rev))) + .collect(); + + if versioned_commits.is_empty() { + None + } else { + // Parse all versions ahead of time to avoid panicking in the binary search + let mut parsed_versions: Vec<(usize, &Rev, Version)> = Vec::new(); + + for (idx, rev) in &versioned_commits { + match Version::parse(&rev.version) { + Ok(rev_version) => parsed_versions.push((*idx, *rev, rev_version)), + Err(_) => { + if !self.quiet { + eprintln!( + "Warning: Invalid version string in Rev: {}", + rev.version + ); + } + // Skip commits with invalid version strings + } } } - prev_rev = Some(curr_rev); + + if parsed_versions.is_empty() { + None // No valid versions found + } else { + // Use binary_search_by to find the index of the first Rev + let search_result = parsed_versions + .binary_search_by(|(_, _, rev_version)| rev_version.cmp(&target_version)); + + // Handle the search result - either exact match or insertion point + let index = match search_result { + Ok(exact_idx) => exact_idx, // Exact match found + Err(insert_idx) => { + if insert_idx == 0 { + // Target is smaller than all versions, return oldest + 0 + } else { + // Return the previous version (largest that's < target) + insert_idx - 1 + } + } + }; + + // Return the Rev at the found index + Some(parsed_versions[index].1.clone()) + } } - })() - .transpose() + }; + + if !self.quiet { + eprintln!("Binary search phase took {:?}", search_start.elapsed()); + eprintln!("Total `find_version` took {:?}", start.elapsed()); + } + + Ok(result_rev) } } @@ -127,8 +301,9 @@ mod test { use super::*; use std::sync::LazyLock; - static EXAMPLES: LazyLock<[Rev; 6]> = LazyLock::new(|| { - [ + // OIDs and versions should correspond to commits where the version changed in clippy repo + static EXAMPLES: LazyLock> = LazyLock::new(|| { + vec![ Rev { version: "0.1.65".to_owned(), channel: "nightly-2022-08-11".to_owned(), @@ -163,22 +338,112 @@ mod test { ] }); - // smoelius: To reiterate a comment from package_options/mod.rs, the `find` below is an - // iterative search and it is causing this test to become increasingly slow. As of this writing, - // the test takes around two minutes. The search should be a binary search. #[test] - fn examples() { - for example in &*EXAMPLES { - let revs = Revs::new(false).unwrap(); - let mut iter = revs.iter().unwrap(); - let rev = iter - .find(|rev| { - rev.as_ref() - .map_or(true, |rev| rev.version == example.version) - }) - .unwrap() - .unwrap(); - assert_eq!(rev, *example); + fn find_latest() { + let revs = Revs::new(false).unwrap(); + let latest_rev = revs.find_latest_rev().unwrap(); + println!("Latest Rev found: {latest_rev:?}"); + assert!(!latest_rev.version.is_empty()); + assert!(!latest_rev.channel.is_empty()); + } + + #[test] + fn find_specific_versions() { + let start = Instant::now(); + let revs = Revs::new(false).unwrap(); + let init_duration = start.elapsed(); + println!("Test Initialization took: {init_duration:?}"); + + for example in EXAMPLES.iter() { + // Example: Use find_version to verify we get the expected version + println!("Searching for version: {}", example.version); + let search_start = Instant::now(); + let found_rev_opt = revs.find_version(&example.version).unwrap(); + println!( + "Search for {} took: {:?}", + example.version, + search_start.elapsed() + ); + + assert!( + found_rev_opt.is_some(), + "Version {} not found", + example.version + ); + let found_rev = found_rev_opt.unwrap(); + + // Assert that the found version is > the target version. + let found_v = Version::parse(&found_rev.version).unwrap(); + let example_v = Version::parse(&example.version).unwrap(); + assert!( + found_v >= example_v, + "Found version {} should be >= target {}", + found_rev.version, + example.version + ); + + // The binary search finds the first commit where the version requirement + // is met, which might not be the exact commit in our examples. + println!("Found Rev for target {}: {:?}", example.version, found_rev); } + + println!("Total test duration: {:?}", start.elapsed()); + } + + #[test] + fn find_non_existent_newer_version() { + let revs = Revs::new(true).unwrap(); + let future_version = "999.0.0"; // Assumed to be newer than anything in clippy repo + let result = revs.find_version(future_version).unwrap(); + assert!( + result.is_none(), + "Should not find a version newer than HEAD" + ); + } + + #[test] + fn find_version_older_than_history() { + // Test that searching for a version older than any known version + // returns the oldest known version. + let revs = Revs::new(true).unwrap(); + let ancient_version = "0.0.1"; // Version older than any in EXAMPLES + + // Find the expected oldest Rev from the static EXAMPLES list + let expected_oldest_rev = EXAMPLES + .iter() + .min_by(|a, b| { + Version::parse(&a.version) + .unwrap() + .cmp(&Version::parse(&b.version).unwrap()) + }) + .unwrap(); + + let result = revs.find_version(ancient_version).unwrap(); + + assert!( + result.is_some(), + "Searching for ancient version {ancient_version} should return the oldest known version, but got None", + ); + + let found_rev = result.unwrap(); + let found_v = Version::parse(&found_rev.version).unwrap(); + let expected_oldest_v = Version::parse(&expected_oldest_rev.version).unwrap(); + + if found_v < expected_oldest_v { + println!( + "Note: Found version {} is older than the oldest example version {}", + found_rev.version, expected_oldest_rev.version + ); + } else { + // Only assert if the found version should be newer than our expected oldest + assert!( + found_v >= expected_oldest_v, + "Found version {} for ancient target should be >= oldest example version {}", + found_rev.version, + expected_oldest_rev.version + ); + } + + println!("Search for ancient version {ancient_version} found Rev: {found_rev:?}"); } }