@@ -79,6 +79,30 @@ def create_remotes(dataset_path):
7979 github_sibling (dataset_path , dataset )
8080
8181
82+ async def export_backup_and_drop (dataset_path ):
83+ """
84+ Export dataset to S3 backup, verify s3-PUBLIC, and drop local data.
85+ """
86+ repo = pygit2 .Repository (dataset_path )
87+ tags = sorted (git_tag (repo ), key = lambda tag : tag .name )
88+ if tags :
89+ s3_backup_push (dataset_path )
90+ for tag in tags :
91+ # Check and clean local annexed files once export is complete
92+ pipeline = Pipeline (broker , git_annex_fsck_remote ).call_next (
93+ annex_drop ,
94+ dataset_path = dataset_path ,
95+ branch = tag ,
96+ remote = get_s3_remote (),
97+ )
98+ # Call the pipeline (arguments for git_annex_fsck_remote)
99+ await pipeline .kiq (
100+ dataset_path ,
101+ branch = tag , # Check the history from the new tag just exported
102+ remote = get_s3_remote (),
103+ )
104+
105+
82106@broker .task
83107async def export_dataset (
84108 dataset_path ,
@@ -111,7 +135,10 @@ async def export_dataset(
111135 clear_dataset_cache (dataset_id )
112136 # Check and clean local annexed files once export is complete
113137 pipeline = Pipeline (broker , git_annex_fsck_remote ).call_next (
114- annex_drop , dataset_path = dataset_path , branch = new_tag
138+ annex_drop ,
139+ dataset_path = dataset_path ,
140+ branch = new_tag ,
141+ remote = get_s3_remote (),
115142 )
116143 # Call the pipeline (arguments for git_annex_fsck_remote)
117144 await pipeline .kiq (
0 commit comments