-
-
Notifications
You must be signed in to change notification settings - Fork 242
fix: --skip-local uses scan roots instead of parent directory #820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -180,7 +180,12 @@ pub fn run(config: &RunConfig) -> Result<RunResult, FinderError> { | |
| } | ||
|
|
||
| let content = str::from_utf8(&map).ok()?; | ||
| let id = file.path.to_string_lossy().into_owned(); | ||
| let id = file | ||
| .path | ||
| .canonicalize() | ||
| .unwrap_or_else(|_| file.path.clone()) | ||
| .to_string_lossy() | ||
| .into_owned(); | ||
|
Comment on lines
+183
to
+188
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep normalization out of the exported This 🤖 Prompt for AI Agents |
||
|
|
||
| // Compute code-level ignore ranges from regex matches against source text. | ||
| // This matches v4 semantics: regex patterns are matched against source | ||
|
|
@@ -304,9 +309,26 @@ pub fn run(config: &RunConfig) -> Result<RunResult, FinderError> { | |
| // Sort groups by format name for determinism. | ||
| format_groups.sort_by(|a, b| a[0].format.cmp(&b[0].format)); | ||
|
|
||
| // 4. Detect clones — skip_local is now handled inside flush_clone. | ||
| let clones = | ||
| pool.install(|| detect_prepared(format_groups, min_tokens, skip_local, config.min_lines)); | ||
| // 4. Detect clones — skip_local uses scan roots to determine same-directory pairs. | ||
| // Both scan roots and file IDs must use the same path normalization so | ||
| // that prefix comparisons work. Canonicalize scan roots once here (resolves | ||
| // symlinks like macOS /var → /private/var), and canonicalize file paths in | ||
| // the parallel processing loop above. Fall back to the original path if | ||
| // canonicalize fails. | ||
| let scan_roots: Vec<std::path::PathBuf> = config | ||
| .paths | ||
| .iter() | ||
| .map(|p| std::fs::canonicalize(p).unwrap_or_else(|_| p.clone())) | ||
| .collect(); | ||
| let clones = pool.install(|| { | ||
| detect_prepared( | ||
| format_groups, | ||
| min_tokens, | ||
| skip_local, | ||
| config.min_lines, | ||
| &scan_roots, | ||
| ) | ||
| }); | ||
|
|
||
| // 5. Compute statistics. | ||
| let statistics = statistics::compute(&source_files, &clones); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -178,7 +178,7 @@ pub struct Cli { | |||||||||||||||
| pub list: bool, | ||||||||||||||||
|
|
||||||||||||||||
| /// Skip clones where both fragments are in the same directory | ||||||||||||||||
| #[arg(long)] | ||||||||||||||||
| #[arg(long, visible_alias = "skipLocal")] | ||||||||||||||||
| pub skip_local: bool, | ||||||||||||||||
|
Comment on lines
180
to
182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the help text to match the new The detector no longer skips only same-directory pairs; it now drops any pair that shares a scan root, including different subdirectories under one scanned path. Leaving the old wording here makes Suggested text- /// Skip clones where both fragments are in the same directory
+ /// Skip clones where both fragments are under the same scan root📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||
|
|
||||||||||||||||
| /// Minimum percentage of duplication to report (0-100) | ||||||||||||||||
|
|
||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Normalize both sides before
is_relative_tocompares them.This helper only compares raw path components, so spellings like
./repo/src/a.jsvsrepoorrepo/../repo/lib/b.jsreturnfalsehere even though the TypeScriptpath.relative()check referenced in the doc comment would still treat both files as under the same root.orchestrate.rscanonicalizes the CLI path, butdetect_with_optionsanddetect_preparedare public entry points, soskip_localstill depends on caller-specific path spelling unless this helper normalizes lexically or the API explicitly requires normalized IDs.🤖 Prompt for AI Agents