Skip to content

Commit fed20ad

Browse files
committed
set.intersection(* is always nice
1 parent 9dbc1ca commit fed20ad

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

xklb/media/dedupe.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,12 @@ def get_fs_duplicates(args) -> List[dict]:
389389
args.db["media"].upsert(path_media_map[path], pk=["path"], alter=True) # save sample-hash back to db
390390
media = [path_media_map[d['path']] for d in media if d['path'] in path_media_map]
391391

392-
sample_hash_groups = defaultdict(list)
392+
sample_hash_groups = defaultdict(set)
393393
for m in media:
394-
sample_hash_groups[m["hash"]].append(m)
394+
sample_hash_groups[m["hash"]].add(m['path'])
395395
sample_hash_groups = [l for l in sample_hash_groups.values() if len(l) > 1]
396396

397-
sample_hash_paths = {d["path"] for g in sample_hash_groups for d in g}
397+
sample_hash_paths = set.intersection(*sample_hash_groups)
398398
log.info(
399399
"Got %s sample-hash duplicates (%s groups). Doing full hash comparison...",
400400
len(sample_hash_paths),

0 commit comments

Comments
 (0)