Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions services/datalad/datalad_service/handlers/drop.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import falcon

from datalad_service.tasks.publish import annex_drop
from datalad_service.tasks.publish import export_backup_and_drop


class DropResource:
Expand All @@ -11,6 +11,6 @@ def __init__(self, store):

async def on_post(self, req, resp, dataset):
dataset_path = self.store.get_dataset_path(dataset)
await annex_drop.kiq(dataset_path)
await export_backup_and_drop.kiq(dataset_path)
resp.media = {}
resp.status = falcon.HTTP_OK
29 changes: 28 additions & 1 deletion services/datalad/datalad_service/tasks/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,30 @@ def create_remotes(dataset_path):
github_sibling(dataset_path, dataset)


async def export_backup_and_drop(dataset_path):
"""
Export dataset to S3 backup, verify s3-PUBLIC, and drop local data.
"""
repo = pygit2.Repository(dataset_path)
tags = sorted(git_tag(repo), key=lambda tag: tag.name)
if tags:
s3_backup_push(dataset_path)
for tag in tags:
# Check and clean local annexed files once export is complete
pipeline = Pipeline(broker, git_annex_fsck_remote).call_next(
annex_drop,
dataset_path=dataset_path,
branch=tag,
remote=get_s3_remote(),
)
# Call the pipeline (arguments for git_annex_fsck_remote)
await pipeline.kiq(
dataset_path,
branch=tag, # Check the history from the new tag just exported
remote=get_s3_remote(),
)


@broker.task
async def export_dataset(
dataset_path,
Expand Down Expand Up @@ -111,7 +135,10 @@ async def export_dataset(
clear_dataset_cache(dataset_id)
# Check and clean local annexed files once export is complete
pipeline = Pipeline(broker, git_annex_fsck_remote).call_next(
annex_drop, dataset_path=dataset_path, branch=new_tag
annex_drop,
dataset_path=dataset_path,
branch=new_tag,
remote=get_s3_remote(),
)
# Call the pipeline (arguments for git_annex_fsck_remote)
await pipeline.kiq(
Expand Down