diff --git a/services/datalad/datalad_service/common/asyncio.py b/services/datalad/datalad_service/common/asyncio.py index af70b871d..91bdc292a 100644 --- a/services/datalad/datalad_service/common/asyncio.py +++ b/services/datalad/datalad_service/common/asyncio.py @@ -2,14 +2,23 @@ import subprocess -async def run_check(command, dataset_path): +async def run_check(command, dataset_path, env=None): """Helper to run an async command and check for failure""" - process = await asyncio.create_subprocess_exec( - *command, - cwd=dataset_path, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + if env: + process = await asyncio.create_subprocess_exec( + *command, + cwd=dataset_path, + env=env, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + else: + process = await asyncio.create_subprocess_exec( + *command, + cwd=dataset_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) stdout, stderr = await process.communicate() if process.returncode != 0: raise subprocess.CalledProcessError(process.returncode, command, stdout, stderr) diff --git a/services/datalad/datalad_service/common/s3.py b/services/datalad/datalad_service/common/s3.py index 3c5f66ee1..89278f41c 100644 --- a/services/datalad/datalad_service/common/s3.py +++ b/services/datalad/datalad_service/common/s3.py @@ -6,6 +6,7 @@ import datalad_service.config from datalad_service.common.annex import annex_initremote, is_git_annex_remote +from datalad_service.common.asyncio import run_check class S3ConfigException(Exception): @@ -118,6 +119,9 @@ def setup_s3_backup_sibling_workaround(dataset_path): Key=f'{dataset_id}/annex-uuid', Body=uuid.encode('utf-8'), ) + # Create the creds file + with open(os.path.join(dataset_path, '.git', 'annex', 'creds', uuid), 'w') as f: + f.write(f'{aws_access_key_id}\n{aws_secret_access_key}\n') # Enableremote after subprocess.run( ['git-annex', 'enableremote', get_s3_backup_remote()], @@ -179,17 +183,18 @@ def validate_s3_config(dataset_path): return True -def s3_export(dataset_path, target, treeish): +async def s3_export(dataset_path, target, treeish): """Perform an S3 export on a git-annex repo.""" - subprocess.check_call( - ['git-annex', 'export', treeish, '--to', target], cwd=dataset_path + await run_check( + ['git-annex', 'export', treeish, '--to', target], + dataset_path, ) -def s3_backup_push(dataset_path): +async def s3_backup_push(dataset_path): """Perform an S3 push to the backup remote on a git-annex repo.""" - subprocess.check_call( + await run_check( ['git-annex', 'push', get_s3_backup_remote()], - cwd=dataset_path, + dataset_path, env=backup_remote_env(), ) diff --git a/services/datalad/datalad_service/handlers/exports.py b/services/datalad/datalad_service/handlers/exports.py new file mode 100644 index 000000000..0f61eaf7e --- /dev/null +++ b/services/datalad/datalad_service/handlers/exports.py @@ -0,0 +1,17 @@ +import logging + +import falcon + +from datalad_service.tasks.files import remove_annex_object + + +class ExportsResource: + """Handler to report status for exports.""" + + def __init__(self, store): + self.store = store + self.logger = logging.getLogger('datalad_service.' + __name__) + + async def on_get(self, req, resp, dataset, remote): + """Report status for exports""" + dataset_path = self.store.get_dataset_path(dataset) diff --git a/services/datalad/datalad_service/tasks/publish.py b/services/datalad/datalad_service/tasks/publish.py index aae7320ed..5d8d52dd9 100644 --- a/services/datalad/datalad_service/tasks/publish.py +++ b/services/datalad/datalad_service/tasks/publish.py @@ -86,7 +86,7 @@ async def export_backup_and_drop(dataset_path): repo = pygit2.Repository(dataset_path) tags = sorted(git_tag(repo), key=lambda tag: tag.name) if tags: - s3_backup_push(dataset_path) + await s3_backup_push(dataset_path) for tag in tags: # Check and clean local annexed files once export is complete pipeline = Pipeline(broker, git_annex_fsck_remote).call_next( @@ -125,8 +125,8 @@ async def export_dataset( # Push the most recent tag if tags: new_tag = tags[-1].name - s3_export(dataset_path, get_s3_remote(), new_tag) - s3_backup_push(dataset_path) + await s3_export(dataset_path, get_s3_remote(), new_tag) + await s3_backup_push(dataset_path) # Once all S3 tags are exported, update GitHub if github_enabled: # Perform all GitHub export steps @@ -138,7 +138,6 @@ async def export_dataset( annex_drop, dataset_path=dataset_path, branch=new_tag, - remote=get_s3_remote(), ) # Call the pipeline (arguments for git_annex_fsck_remote) await pipeline.kiq( @@ -268,4 +267,10 @@ async def annex_drop(fsck_success, dataset_path, branch): pass # Drop will only drop successfully exported files present on both remotes if fsck_success: - await run_check(['git-annex', 'drop', '--branch', branch], dataset_path) + env = os.environ.copy() + # Force git-annex to use cached credentials for this + del env['AWS_ACCESS_KEY_ID'] + del env['AWS_SECRET_ACCESS_KEY'] + await run_check( + ['git-annex', 'drop', '--branch', branch], dataset_path, env=env + ) diff --git a/services/datalad/tests/test_publish.py b/services/datalad/tests/test_publish.py index 0f21d71c3..43c67d3d1 100644 --- a/services/datalad/tests/test_publish.py +++ b/services/datalad/tests/test_publish.py @@ -1,6 +1,6 @@ import json import os -from unittest.mock import Mock, call +from unittest.mock import AsyncMock, Mock, call import falcon @@ -50,7 +50,7 @@ async def test_export_snapshots(no_init_remote, client, new_dataset): # Make it public create_remotes(new_dataset.path) # Export - s3_export_mock = Mock() + s3_export_mock = AsyncMock() github_export_mock = Mock() update_s3_sibling_mock = Mock() await export_dataset(