Skip to content

Commit 725d5b0

Browse files
committed
Add task for catching missing sha256 checksums
1 parent b09fde7 commit 725d5b0

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

dandiapi/api/tasks/__init__.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
4+
35
from celery import shared_task
46
from celery.utils.log import get_task_logger
57

@@ -13,6 +15,9 @@
1315
)
1416
from dandiapi.api.models import Asset, AssetBlob, Version
1517

18+
if TYPE_CHECKING:
19+
from uuid import UUID
20+
1621
logger = get_task_logger(__name__)
1722

1823

@@ -25,9 +30,9 @@ def remove_asset_blob_embargoed_tag_task(blob_id: str) -> None:
2530

2631

2732
@shared_task(queue='calculate_sha256', soft_time_limit=86_400)
28-
def calculate_sha256(blob_id: str) -> None:
33+
def calculate_sha256(blob_id: str | UUID) -> None:
2934
asset_blob = AssetBlob.objects.get(blob_id=blob_id)
30-
logger.info('Found AssetBlob %s', blob_id)
35+
logger.info('Calculating sha256 checksum for asset blob %s', blob_id)
3136
sha256 = asset_blob.blob.storage.sha256_checksum(asset_blob.blob.name)
3237

3338
# TODO: Run dandi-cli validation

dandiapi/api/tasks/scheduled.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@
2222
from dandiapi.analytics.tasks import collect_s3_log_records_task
2323
from dandiapi.api.mail import send_dandisets_to_unembargo_message, send_pending_users_message
2424
from dandiapi.api.models import UserMetadata, Version
25-
from dandiapi.api.models.asset import Asset
25+
from dandiapi.api.models.asset import Asset, AssetBlob
2626
from dandiapi.api.models.dandiset import Dandiset
2727
from dandiapi.api.services.metadata import version_aggregate_assets_summary
2828
from dandiapi.api.services.metadata.exceptions import VersionMetadataConcurrentlyModifiedError
2929
from dandiapi.api.tasks import (
30+
calculate_sha256,
3031
validate_asset_metadata_task,
3132
validate_version_metadata_task,
3233
write_manifest_files,
@@ -63,6 +64,19 @@ def aggregate_assets_summary_task(version_id: int):
6364
version_aggregate_assets_summary(version)
6465

6566

67+
@shared_task(soft_time_limit=30)
68+
def recalculate_missing_sha256_checksums():
69+
blobs_missing = AssetBlob.objects.filter(sha256__isnull=True).values_list('blob_id', flat=True)
70+
num_missing = blobs_missing.count()
71+
if num_missing == 0:
72+
logger.info('No blobs with missing checksums found')
73+
return
74+
75+
logger.info('Found %s blobs with missing checksums', num_missing)
76+
for blob_id in blobs_missing:
77+
calculate_sha256.delay(blob_id)
78+
79+
6680
@shared_task(soft_time_limit=30)
6781
def validate_pending_asset_metadata():
6882
validatable_assets = (
@@ -156,3 +170,6 @@ def register_scheduled_tasks(sender: Celery, **kwargs):
156170

157171
# Process new S3 logs every hour
158172
sender.add_periodic_task(timedelta(hours=1), collect_s3_log_records_task.s())
173+
174+
# Check for asset blobs with missing sha256 checksums once a day
175+
sender.add_periodic_task(crontab(hour=3, minute=0), recalculate_missing_sha256_checksums.s())

0 commit comments

Comments
 (0)