|
21 | 21 | from datalad_service.config import GCP_ACCESS_KEY_ID |
22 | 22 | from datalad_service.config import GCP_SECRET_ACCESS_KEY |
23 | 23 | from datalad_service.common.annex import get_tag_info, is_git_annex_remote |
24 | | -from datalad_service.common.openneuro import clear_dataset_cache |
| 24 | +from datalad_service.common.openneuro import clear_dataset_cache, is_public_dataset |
25 | 25 | from datalad_service.common.git import git_show, git_tag, git_tag_tree |
26 | 26 | from datalad_service.common.github import github_export |
27 | 27 | from datalad_service.common.s3 import ( |
@@ -126,6 +126,8 @@ async def export_dataset( |
126 | 126 | if tags: |
127 | 127 | new_tag = tags[-1].name |
128 | 128 | await s3_export(dataset_path, get_s3_remote(), new_tag) |
| 129 | + if not is_public_dataset(dataset_id): |
| 130 | + await set_s3_access_tag(dataset_id, 'private') |
129 | 131 | await s3_backup_push(dataset_path) |
130 | 132 | # Once all S3 tags are exported, update GitHub |
131 | 133 | if github_enabled: |
@@ -274,3 +276,48 @@ async def annex_drop(fsck_success, dataset_path, branch): |
274 | 276 | await run_check( |
275 | 277 | ['git-annex', 'drop', '--branch', branch], dataset_path, env=env |
276 | 278 | ) |
| 279 | + |
| 280 | + |
| 281 | +async def set_remote_public(dataset): |
| 282 | + """Clear x-amz-meta-access when a dataset is made public.""" |
| 283 | + # If git-annex supports tags in the future, we'd modify this here. |
| 284 | + # await run_check( |
| 285 | + # ['git-annex', 'enableremote', get_s3_remote(), 'x-amz-tagging=access=public'], |
| 286 | + # dataset_path, |
| 287 | + # ) |
| 288 | + await set_s3_access_tag(dataset, 'public') |
| 289 | + |
| 290 | + |
| 291 | +@broker.task |
| 292 | +async def set_s3_access_tag(dataset, value='private'): |
| 293 | + """Set access tag on all versions of all files.""" |
| 294 | + client = boto3.client( |
| 295 | + 's3', |
| 296 | + aws_access_key_id=AWS_ACCESS_KEY_ID, |
| 297 | + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, |
| 298 | + ) |
| 299 | + s3_bucket = get_s3_bucket() |
| 300 | + paginator = client.get_paginator('list_object_versions') |
| 301 | + for page in paginator.paginate(Bucket=s3_bucket, Prefix=f'{dataset}/'): |
| 302 | + for version in page.get('Versions', []): |
| 303 | + key = version['Key'] |
| 304 | + version_id = version['VersionId'] |
| 305 | + try: |
| 306 | + response = client.get_object_tagging( |
| 307 | + Bucket=s3_bucket, Key=key, VersionId=version_id |
| 308 | + ) |
| 309 | + tag_set = response.get('TagSet', []) |
| 310 | + except client.exceptions.ClientError as e: |
| 311 | + if e.response['Error']['Code'] == 'NoSuchTagSet': |
| 312 | + tag_set = [] |
| 313 | + else: |
| 314 | + raise |
| 315 | + # Remove any existing access tag and add the new one |
| 316 | + new_tags = [tag for tag in tag_set if tag['Key'] != 'access'] |
| 317 | + new_tags.append({'Key': 'access', 'Value': value}) |
| 318 | + client.put_object_tagging( |
| 319 | + Bucket=s3_bucket, |
| 320 | + Key=key, |
| 321 | + VersionId=version_id, |
| 322 | + Tagging={'TagSet': new_tags}, |
| 323 | + ) |
0 commit comments