Skip to content

Commit c9afcd2

Browse files
committed
fix(worker): Skip re-export on drop if remote contains all files for a branch or tag
1 parent 871f977 commit c9afcd2

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

services/datalad/datalad_service/common/asyncio.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ async def run_check(command, dataset_path, env=None):
2222
stdout, stderr = await process.communicate()
2323
if process.returncode != 0:
2424
raise subprocess.CalledProcessError(process.returncode, command, stdout, stderr)
25+
return stdout.decode('utf-8')

services/datalad/datalad_service/tasks/publish.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,12 @@ async def export_backup_and_drop(dataset_path):
105105
await s3_backup_push(dataset_path)
106106
for tag in tags:
107107
logger.info(f'Exporting/dropping tag {dataset_id}@{tag.name}')
108-
# Private datasets need to export each tag for this step
109-
if not public_dataset:
108+
export_ran = False
109+
if await find_in_remote(dataset_path, tag.name, get_s3_remote()):
110+
export_ran = True
111+
await s3_export(dataset_path, get_s3_remote(), tag.name)
112+
if tag == tags[-1] and export_ran:
113+
# Always export the most recent tag again if any export ran
110114
await s3_export(dataset_path, get_s3_remote(), tag.name)
111115
await fsck_and_drop(dataset_path, tag.name)
112116
logger.info(f'Exporting/dropping tag {dataset_id}@{tag.name} complete')
@@ -116,6 +120,19 @@ async def export_backup_and_drop(dataset_path):
116120
logger.info(f'{dataset_id} export_backup_and_drop complete')
117121

118122

123+
async def find_in_remote(dataset_path, tag, remote):
124+
"""Check if any git-annex objects available locally for a branch are not present in a remote."""
125+
output = await run_check(
126+
['git-annex', 'find', f'--branch={tag}', '--not', f'--in={remote}'],
127+
dataset_path,
128+
)
129+
if len(output) > 0:
130+
# Some keys are missing
131+
return False
132+
# All keys are present
133+
return True
134+
135+
119136
@broker.task
120137
async def export_dataset(
121138
dataset_path,

0 commit comments

Comments
 (0)