OpenNeuroOrg · nellh · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/services/datalad/datalad_service/common/asyncio.py b/services/datalad/datalad_service/common/asyncio.py
@@ -2,14 +2,23 @@
 import subprocess
 
 
-async def run_check(command, dataset_path):
+async def run_check(command, dataset_path, env=None):
     """Helper to run an async command and check for failure"""
-    process = await asyncio.create_subprocess_exec(
-        *command,
-        cwd=dataset_path,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
+    if env:
+        process = await asyncio.create_subprocess_exec(
+            *command,
+            cwd=dataset_path,
+            env=env,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    else:
+        process = await asyncio.create_subprocess_exec(
+            *command,
+            cwd=dataset_path,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
-    if env:
-        process = await asyncio.create_subprocess_exec(
-            *command,
-            cwd=dataset_path,
-            env=env,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-    else:
-        process = await asyncio.create_subprocess_exec(
-            *command,
-            cwd=dataset_path,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
+    process = await asyncio.create_subprocess_exec(
+        *command,
+        cwd=dataset_path,
+        env=env,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
-    if env:
-        process = await asyncio.create_subprocess_exec(
-            *command,
-            cwd=dataset_path,
-            env=env,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-    else:
-        process = await asyncio.create_subprocess_exec(
-            *command,
-            cwd=dataset_path,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
+    process = await asyncio.create_subprocess_exec(
+        *command,
+        cwd=dataset_path,
+        env=env,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
     stdout, stderr = await process.communicate()
     if process.returncode != 0:
         raise subprocess.CalledProcessError(process.returncode, command, stdout, stderr)
diff --git a/services/datalad/datalad_service/common/s3.py b/services/datalad/datalad_service/common/s3.py
@@ -6,6 +6,7 @@
 
 import datalad_service.config
 from datalad_service.common.annex import annex_initremote, is_git_annex_remote
+from datalad_service.common.asyncio import run_check
 
 
 class S3ConfigException(Exception):
@@ -118,6 +119,9 @@ def setup_s3_backup_sibling_workaround(dataset_path):
         Key=f'{dataset_id}/annex-uuid',
         Body=uuid.encode('utf-8'),
     )
+    # Create the creds file
+    with open(os.path.join(dataset_path, '.git', 'annex', 'creds', uuid), 'w') as f:
+        f.write(f'{aws_access_key_id}\n{aws_secret_access_key}\n')
     # Enableremote after
     subprocess.run(
         ['git-annex', 'enableremote', get_s3_backup_remote()],
@@ -179,17 +183,18 @@ def validate_s3_config(dataset_path):
     return True
 
 
-def s3_export(dataset_path, target, treeish):
+async def s3_export(dataset_path, target, treeish):
     """Perform an S3 export on a git-annex repo."""
-    subprocess.check_call(
-        ['git-annex', 'export', treeish, '--to', target], cwd=dataset_path
+    await run_check(
+        ['git-annex', 'export', treeish, '--to', target],
+        dataset_path,
     )
 
 
-def s3_backup_push(dataset_path):
+async def s3_backup_push(dataset_path):
     """Perform an S3 push to the backup remote on a git-annex repo."""
-    subprocess.check_call(
+    await run_check(
         ['git-annex', 'push', get_s3_backup_remote()],
-        cwd=dataset_path,
+        dataset_path,
         env=backup_remote_env(),
     )
diff --git a/services/datalad/datalad_service/handlers/exports.py b/services/datalad/datalad_service/handlers/exports.py
@@ -0,0 +1,17 @@
+import logging
+
+import falcon
+
+from datalad_service.tasks.files import remove_annex_object
+
+
+class ExportsResource:
+    """Handler to report status for exports."""
+
+    def __init__(self, store):
+        self.store = store
+        self.logger = logging.getLogger('datalad_service.' + __name__)
+
+    async def on_get(self, req, resp, dataset, remote):
+        """Report status for exports"""
+        dataset_path = self.store.get_dataset_path(dataset)
diff --git a/services/datalad/datalad_service/tasks/publish.py b/services/datalad/datalad_service/tasks/publish.py
@@ -86,7 +86,7 @@ async def export_backup_and_drop(dataset_path):
     repo = pygit2.Repository(dataset_path)
     tags = sorted(git_tag(repo), key=lambda tag: tag.name)
     if tags:
-        s3_backup_push(dataset_path)
+        await s3_backup_push(dataset_path)
         for tag in tags:
             # Check and clean local annexed files once export is complete
             pipeline = Pipeline(broker, git_annex_fsck_remote).call_next(
@@ -125,8 +125,8 @@ async def export_dataset(
         # Push the most recent tag
         if tags:
             new_tag = tags[-1].name
-            s3_export(dataset_path, get_s3_remote(), new_tag)
-            s3_backup_push(dataset_path)
+            await s3_export(dataset_path, get_s3_remote(), new_tag)
+            await s3_backup_push(dataset_path)
             # Once all S3 tags are exported, update GitHub
             if github_enabled:
                 # Perform all GitHub export steps
@@ -138,7 +138,6 @@ async def export_dataset(
                 annex_drop,
                 dataset_path=dataset_path,
                 branch=new_tag,
-                remote=get_s3_remote(),
             )
             # Call the pipeline (arguments for git_annex_fsck_remote)
             await pipeline.kiq(
@@ -268,4 +267,10 @@ async def annex_drop(fsck_success, dataset_path, branch):
         pass
     # Drop will only drop successfully exported files present on both remotes
     if fsck_success:
-        await run_check(['git-annex', 'drop', '--branch', branch], dataset_path)
+        env = os.environ.copy()
+        # Force git-annex to use cached credentials for this
+        del env['AWS_ACCESS_KEY_ID']
+        del env['AWS_SECRET_ACCESS_KEY']
+        await run_check(
+            ['git-annex', 'drop', '--branch', branch], dataset_path, env=env
+        )
diff --git a/services/datalad/tests/test_publish.py b/services/datalad/tests/test_publish.py
@@ -1,6 +1,6 @@
 import json
 import os
-from unittest.mock import Mock, call
+from unittest.mock import AsyncMock, Mock, call
 
 import falcon
 
@@ -50,7 +50,7 @@ async def test_export_snapshots(no_init_remote, client, new_dataset):
     # Make it public
     create_remotes(new_dataset.path)
     # Export
-    s3_export_mock = Mock()
+    s3_export_mock = AsyncMock()
     github_export_mock = Mock()
     update_s3_sibling_mock = Mock()
     await export_dataset(