Skip to content

Commit 57854bf

Browse files
committed
dedupe sample_hash
1 parent 2aee3e3 commit 57854bf

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

xklb/media/dedupe.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,11 @@ def get_fs_duplicates(args) -> List[dict]:
378378
for path, hash in zip(sample_hash_paths, hash_results):
379379
for m in media:
380380
if m['path'] == path:
381-
m['hash'] = hash
382-
args.db["media"].upsert(m, pk=["path"], alter=True) # save sample-hash back to db
381+
if hash is None:
382+
del m
383+
else:
384+
m['hash'] = hash
385+
args.db["media"].upsert(m, pk=["path"], alter=True) # save sample-hash back to db
383386

384387
sample_hash_groups = defaultdict(list)
385388
for m in media:

xklb/scripts/sample_hash.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ def threadpool_read(path, segments, chunk_size, max_workers=10):
2929

3030

3131
def sample_hash_file(path, threads=1, gap=0.1, chunk_size=None):
32-
file_stats = Path(path).stat()
32+
try:
33+
file_stats = Path(path).stat()
34+
except FileNotFoundError:
35+
return None
36+
3337
disk_usage = (
3438
file_stats.st_blocks * 512
3539
) # https://github.com/python/cpython/blob/main/Doc/library/os.rst#files-and-directories

0 commit comments

Comments
 (0)