Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions services/datalad/datalad_service/common/asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@
import subprocess


async def run_check(command, dataset_path):
async def run_check(command, dataset_path, env=None):
"""Helper to run an async command and check for failure"""
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
if env:
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
else:
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
Comment on lines +7 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default value of env is already None: https://docs.python.org/3/library/subprocess.html#subprocess.Popen

Suggested change
if env:
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
else:
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
process = await asyncio.create_subprocess_exec(
*command,
cwd=dataset_path,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)

Did you find this made a difference in practice?

stdout, stderr = await process.communicate()
if process.returncode != 0:
raise subprocess.CalledProcessError(process.returncode, command, stdout, stderr)
17 changes: 11 additions & 6 deletions services/datalad/datalad_service/common/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import datalad_service.config
from datalad_service.common.annex import annex_initremote, is_git_annex_remote
from datalad_service.common.asyncio import run_check


class S3ConfigException(Exception):
Expand Down Expand Up @@ -118,6 +119,9 @@ def setup_s3_backup_sibling_workaround(dataset_path):
Key=f'{dataset_id}/annex-uuid',
Body=uuid.encode('utf-8'),
)
# Create the creds file
with open(os.path.join(dataset_path, '.git', 'annex', 'creds', uuid), 'w') as f:
f.write(f'{aws_access_key_id}\n{aws_secret_access_key}\n')
# Enableremote after
subprocess.run(
['git-annex', 'enableremote', get_s3_backup_remote()],
Expand Down Expand Up @@ -179,17 +183,18 @@ def validate_s3_config(dataset_path):
return True


def s3_export(dataset_path, target, treeish):
async def s3_export(dataset_path, target, treeish):
"""Perform an S3 export on a git-annex repo."""
subprocess.check_call(
['git-annex', 'export', treeish, '--to', target], cwd=dataset_path
await run_check(
['git-annex', 'export', treeish, '--to', target],
dataset_path,
)


def s3_backup_push(dataset_path):
async def s3_backup_push(dataset_path):
"""Perform an S3 push to the backup remote on a git-annex repo."""
subprocess.check_call(
await run_check(
['git-annex', 'push', get_s3_backup_remote()],
cwd=dataset_path,
dataset_path,
env=backup_remote_env(),
)
17 changes: 17 additions & 0 deletions services/datalad/datalad_service/handlers/exports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging

import falcon

from datalad_service.tasks.files import remove_annex_object


class ExportsResource:
"""Handler to report status for exports."""

def __init__(self, store):
self.store = store
self.logger = logging.getLogger('datalad_service.' + __name__)

async def on_get(self, req, resp, dataset, remote):
"""Report status for exports"""
dataset_path = self.store.get_dataset_path(dataset)
15 changes: 10 additions & 5 deletions services/datalad/datalad_service/tasks/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async def export_backup_and_drop(dataset_path):
repo = pygit2.Repository(dataset_path)
tags = sorted(git_tag(repo), key=lambda tag: tag.name)
if tags:
s3_backup_push(dataset_path)
await s3_backup_push(dataset_path)
for tag in tags:
# Check and clean local annexed files once export is complete
pipeline = Pipeline(broker, git_annex_fsck_remote).call_next(
Expand Down Expand Up @@ -125,8 +125,8 @@ async def export_dataset(
# Push the most recent tag
if tags:
new_tag = tags[-1].name
s3_export(dataset_path, get_s3_remote(), new_tag)
s3_backup_push(dataset_path)
await s3_export(dataset_path, get_s3_remote(), new_tag)
await s3_backup_push(dataset_path)
# Once all S3 tags are exported, update GitHub
if github_enabled:
# Perform all GitHub export steps
Expand All @@ -138,7 +138,6 @@ async def export_dataset(
annex_drop,
dataset_path=dataset_path,
branch=new_tag,
remote=get_s3_remote(),
)
# Call the pipeline (arguments for git_annex_fsck_remote)
await pipeline.kiq(
Expand Down Expand Up @@ -268,4 +267,10 @@ async def annex_drop(fsck_success, dataset_path, branch):
pass
# Drop will only drop successfully exported files present on both remotes
if fsck_success:
await run_check(['git-annex', 'drop', '--branch', branch], dataset_path)
env = os.environ.copy()
# Force git-annex to use cached credentials for this
del env['AWS_ACCESS_KEY_ID']
del env['AWS_SECRET_ACCESS_KEY']
await run_check(
['git-annex', 'drop', '--branch', branch], dataset_path, env=env
)
4 changes: 2 additions & 2 deletions services/datalad/tests/test_publish.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
from unittest.mock import Mock, call
from unittest.mock import AsyncMock, Mock, call

import falcon

Expand Down Expand Up @@ -50,7 +50,7 @@ async def test_export_snapshots(no_init_remote, client, new_dataset):
# Make it public
create_remotes(new_dataset.path)
# Export
s3_export_mock = Mock()
s3_export_mock = AsyncMock()
github_export_mock = Mock()
update_s3_sibling_mock = Mock()
await export_dataset(
Expand Down