Skip to content

Commit 8688195

Browse files
darktorresclaude
andcommitted
Fix prehash cache bypass causing full hash scan on all files in second scan
Cached prehash entries were appended to pre_checked_map unconditionally, skipping the collision filter that freshly-computed entries go through. On a second scan where all files are cached, every file was forwarded to full hashing regardless of prehash result. Also fixed a latent bug where a cached file and a freshly-computed file sharing the same prehash were never detected as a collision, since each loop only checked within its own set. Both loops are now replaced with a single merge-then-filter pass over a combined (size -> hash -> files) map. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8aa765f commit 8688195

File tree

1 file changed

+23
-10
lines changed
  • czkawka_core/src/tools/duplicate

1 file changed

+23
-10
lines changed

czkawka_core/src/tools/duplicate/core.rs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -474,19 +474,32 @@ impl DuplicateFinder {
474474
// Saving into cache
475475
let progress_handler = prepare_thread_handler_common(progress_sender, CurrentStage::DuplicatePreHashCacheSaving, 0, self.get_test_type(), 0);
476476

477-
// Add data from cache
478-
for (size, mut vec_file_entry) in records_already_cached {
479-
pre_checked_map.entry(size).or_default().append(&mut vec_file_entry);
480-
}
481-
482-
// Check results
483-
for (size, hash_map, errors) in &pre_hash_results {
477+
// Collect errors from freshly-computed results
478+
for (_size, _hash_map, errors) in &pre_hash_results {
484479
if !errors.is_empty() {
485480
self.common_data.text_messages.warnings.append(&mut errors.clone());
486481
}
487-
for vec_file_entry in hash_map.values() {
488-
if vec_file_entry.len() > 1 {
489-
pre_checked_map.entry(*size).or_default().append(&mut vec_file_entry.clone());
482+
}
483+
484+
// Merge cached and freshly-computed entries into (size -> hash -> files) groups,
485+
// then only pass groups with >1 file to full hashing. Merging is required so that
486+
// a cached file and a freshly-computed file sharing the same prehash are recognised
487+
// as a collision even when neither set alone has more than one entry for that hash.
488+
let mut combined: BTreeMap<u64, BTreeMap<String, Vec<DuplicateEntry>>> = BTreeMap::new();
489+
for (size, vec_file_entry) in records_already_cached {
490+
for file_entry in vec_file_entry {
491+
combined.entry(size).or_default().entry(file_entry.hash.clone()).or_default().push(file_entry);
492+
}
493+
}
494+
for (size, hash_map, _errors) in &pre_hash_results {
495+
for (hash, vec_file_entry) in hash_map {
496+
combined.entry(*size).or_default().entry(hash.clone()).or_default().extend(vec_file_entry.iter().cloned());
497+
}
498+
}
499+
for (size, hash_groups) in combined {
500+
for group in hash_groups.into_values() {
501+
if group.len() > 1 {
502+
pre_checked_map.entry(size).or_default().extend(group);
490503
}
491504
}
492505
}

0 commit comments

Comments
 (0)