Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions services/datalad/datalad_service/common/annex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import datalad_service.config
from datalad_service.common.git import git_show
from datalad_service.common.s3_client import presign_remote_url, get_s3_remote

SERVICE_EMAIL = '[email protected]'
SERVICE_USER = 'Git Worker'
Expand All @@ -21,6 +22,7 @@
'openneuro-dev-datalad-public',
'openneuro-derivatives',
'bobsrepository',
'openneuro-datalad-public-nell-test',
]


Expand Down Expand Up @@ -147,8 +149,17 @@ def parse_rmet_line(remote, rmetLine):
remoteContext, remoteData = rmetLine.split('V +')
slash = '' if remote['url'][-1] == '/' else '/'
s3version, path = remoteData.split('#')
return '{}{}{}?versionId={}'.format(remote['url'], slash, path, s3version)
if remote['name'] == get_s3_remote():
# Presigned via OpenNeuro's credentials
url = presign_remote_url(path, s3version)
return url
else:
# Anonymous access for any other buckets
return encode_remote_url(
'{}{}{}?versionId={}'.format(remote['url'], slash, path, s3version)
)
except:
raise
return None


Expand Down Expand Up @@ -260,8 +271,7 @@ def get_repo_urls(path, files):
for path in rmetPaths:
url = read_rmet_file(remote, catFile)
if url:
encoded_url = encode_remote_url(url)
rmetFiles[path]['urls'].append(encoded_url)
rmetFiles[path]['urls'].append(url)
return files


Expand Down Expand Up @@ -378,9 +388,9 @@ def test_key_remote(dataset_path, key, remote_name='s3-PUBLIC'):
remote_log = git_show(repo, 'git-annex', 'remote.log')
except KeyError:
return None
for line in remote_log:
for line in remote_log.splitlines():
remote = parse_remote_line(line)
if remote['name'] == remote_name:
if remote and remote['name'] == remote_name:
rmet_path = compute_rmet(key)
try:
rmet = git_show(repo, 'git-annex', rmet_path)
Expand Down
22 changes: 6 additions & 16 deletions services/datalad/datalad_service/common/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,18 @@
import datalad_service.config
from datalad_service.common.annex import annex_initremote, is_git_annex_remote
from datalad_service.common.asyncio import run_check
from datalad_service.common.s3_client import (
get_s3_bucket,
get_s3_remote,
get_s3_backup_bucket,
get_s3_backup_remote,
)


class S3ConfigException(Exception):
pass


def get_s3_remote():
return 's3-PUBLIC'


def get_s3_backup_remote():
return 's3-BACKUP'


def get_s3_bucket():
return getattr(datalad_service.config, 'AWS_S3_PUBLIC_BUCKET')


def get_s3_backup_bucket():
return getattr(datalad_service.config, 'GCP_S3_BACKUP_BUCKET')


def generate_s3_annex_options(dataset_path, backup=False):
dataset_id = os.path.basename(dataset_path)
annex_options = [
Expand Down
52 changes: 52 additions & 0 deletions services/datalad/datalad_service/common/s3_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import boto3
from botocore.client import Config

import datalad_service.config

boto3_session = None
boto3_s3_client = None


def get_s3_client():
"""Setup a reusable boto3 session and S3 client."""
global boto3_session
if not boto3_session:
aws_access_key_id = getattr(datalad_service.config, 'AWS_ACCESS_KEY_ID')
aws_secret_access_key = getattr(datalad_service.config, 'AWS_SECRET_ACCESS_KEY')
boto3_session = boto3.session.Session(aws_access_key_id, aws_secret_access_key)
global boto3_s3_client
if not boto3_s3_client:
boto3_config = Config(s3={'addressing_style': 'path'})
boto3_s3_client = boto3_session.client('s3', config=boto3_config)
return boto3_s3_client


def presign_remote_url(key, version, expiration=604800):
"""Presign URLs for the public bucket on S3."""
bucket = get_s3_bucket()
s3_client = get_s3_client()
return s3_client.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': bucket,
'Key': key,
'VersionId': version,
},
ExpiresIn=expiration,
)


def get_s3_remote():
return 's3-PUBLIC'


def get_s3_backup_remote():
return 's3-BACKUP'


def get_s3_bucket():
return getattr(datalad_service.config, 'AWS_S3_PUBLIC_BUCKET')


def get_s3_backup_bucket():
return getattr(datalad_service.config, 'GCP_S3_BACKUP_BUCKET')
10 changes: 8 additions & 2 deletions services/datalad/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,12 @@ def no_publish(monkeypatch):
)


@pytest.fixture
@pytest.fixture(autouse=True)
def s3_creds(monkeypatch):
monkeypatch.setenv('AWS_S3_PUBLIC_BUCKET', 'a-fake-test-public-bucket')
monkeypatch.setenv('AWS_S3_PRIVATE_BUCKET', 'a-fake-test-private-bucket')
monkeypatch.setattr(
datalad_service.config, 'AWS_S3_PUBLIC_BUCKET', 'a-fake-test-public-bucket'
)


@pytest.fixture(autouse=True)
Expand All @@ -212,6 +214,10 @@ def access_keys(monkeypatch):
monkeypatch.setenv('AWS_SECRET_ACCESS_KEY', 'aws-secret')
monkeypatch.setenv('GCP_ACCESS_KEY_ID', 'gcp-id')
monkeypatch.setenv('GCP_SECRET_ACCESS_KEY', 'gcp-secret')
monkeypatch.setattr(datalad_service.config, 'AWS_ACCESS_KEY_ID', 'aws-id')
monkeypatch.setattr(datalad_service.config, 'AWS_SECRET_ACCESS_KEY', 'aws-secret')
monkeypatch.setattr(datalad_service.config, 'GCP_ACCESS_KEY_ID', 'gcp-id')
monkeypatch.setattr(datalad_service.config, 'GCP_SECRET_ACCESS_KEY', 'gcp-secret')


@pytest.fixture(autouse=True)
Expand Down
18 changes: 12 additions & 6 deletions services/datalad/tests/test_annex.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def test_parse_remote_line():

def test_parse_rmet_line():
remote = {
'name': 's3-PUBLIC',
'url': 'http://openneuro.org.s3.amazonaws.com/',
'uuid': '57894849-d0c8-4c62-8418-3627be18a196',
}
Expand All @@ -164,13 +165,15 @@ def test_parse_rmet_line():
"""1590213748.042921433s 57894849-d0c8-4c62-8418-3627be18a196:V +iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y#ds002778/dataset_description.json""",
)
assert (
url
== 'http://openneuro.org.s3.amazonaws.com/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y'
'https://s3.amazonaws.com/a-fake-test-public-bucket/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y&AWSAccessKeyId=aws-id'
in url
)
assert 'Signature=' in url


def test_parse_rmet_line_https():
remote = {
'name': 's3-PUBLIC',
'url': 'https://s3.amazonaws.com/openneuro.org',
'uuid': '57894849-d0c8-4c62-8418-3627be18a196',
}
Expand All @@ -179,23 +182,26 @@ def test_parse_rmet_line_https():
"""1590213748.042921433s 57894849-d0c8-4c62-8418-3627be18a196:V +iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y#ds002778/dataset_description.json""",
)
assert (
url
== 'https://s3.amazonaws.com/openneuro.org/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y'
'https://s3.amazonaws.com/a-fake-test-public-bucket/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y&AWSAccessKeyId=aws-id'
in url
)
assert 'Signature=' in url


def test_read_rmet_file():
remote = {
'name': 's3-PUBLIC',
'url': 'http://openneuro.org.s3.amazonaws.com/',
'uuid': '57894849-d0c8-4c62-8418-3627be18a196',
}
catFile = io.StringIO(""":::99fe93bfea62c16a10488593da870df25d09be81
1590213748.042921433s 57894849-d0c8-4c62-8418-3627be18a196:V +iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y#ds002778/dataset_description.json""")
url = read_rmet_file(remote, catFile)
assert (
url
== 'http://openneuro.org.s3.amazonaws.com/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y'
'https://s3.amazonaws.com/a-fake-test-public-bucket/ds002778/dataset_description.json?versionId=iVcEk18e3J2WQys4zr_ANaTPfpUufW4Y&AWSAccessKeyId=aws-id'
in url
)
assert 'Signature=' in url


def test_remote_url_encoding():
Expand Down