@@ -12,43 +12,88 @@ def get_s3_remote():
1212 return 's3-PUBLIC'
1313
1414
15+ def get_s3_backup_remote ():
16+ return 's3-BACKUP'
17+
18+
1519def get_s3_bucket ():
1620 return getattr (datalad_service .config , 'AWS_S3_PUBLIC_BUCKET' )
1721
1822
19- def generate_s3_annex_options (dataset_path ):
23+ def get_s3_backup_bucket ():
24+ return getattr (datalad_service .config , 'GCP_S3_BACKUP_BUCKET' )
25+
26+
27+ def generate_s3_annex_options (dataset_path , backup = False ):
2028 dataset_id = os .path .basename (dataset_path )
2129 annex_options = [
2230 'type=S3' ,
23- f'bucket={ get_s3_bucket ()} ' ,
24- 'exporttree=yes' ,
25- 'versioning=yes' ,
2631 'partsize=1GiB' ,
2732 'encryption=none' ,
2833 f'fileprefix={ dataset_id } /' ,
29- 'autoenable=true' ,
30- f'publicurl=https://s3.amazonaws.com/{ get_s3_bucket ()} ' ,
3134 'public=no' ,
3235 ]
36+ if backup :
37+ annex_options += [
38+ f'bucket={ get_s3_backup_bucket ()} ' ,
39+ 'cost=400' ,
40+ 'host=storage.googleapis.com' ,
41+ 'storageclass=ARCHIVE' ,
42+ ]
43+ else :
44+ annex_options += [
45+ 'exporttree=yes' ,
46+ 'versioning=yes' ,
47+ f'bucket={ get_s3_bucket ()} ' ,
48+ 'autoenable=true' ,
49+ f'publicurl=https://s3.amazonaws.com/{ get_s3_bucket ()} ' ,
50+ ]
3351 return annex_options
3452
3553
54+ def backup_remote_env ():
55+ """Copy and modify the environment for setup/modification of backup remote settings."""
56+ backup_remote_env = os .environ .copy ()
57+ # Overwrite the AWS keys with the GCP key
58+ backup_remote_env ['AWS_ACCESS_KEY_ID' ] = backup_remote_env ['GCP_ACCESS_KEY_ID' ]
59+ backup_remote_env ['AWS_SECRET_ACCESS_KEY' ] = backup_remote_env [
60+ 'GCP_SECRET_ACCESS_KEY'
61+ ]
62+ return backup_remote_env
63+
64+
3665def setup_s3_sibling (dataset_path ):
3766 """Add a sibling for an S3 bucket publish."""
38- annex_options = generate_s3_annex_options (dataset_path )
67+ # Public remote
68+ subprocess .run (
69+ ['git-annex' , 'initremote' , get_s3_remote ()]
70+ + generate_s3_annex_options (dataset_path ),
71+ cwd = dataset_path ,
72+ )
73+ # Backup remote
3974 subprocess .run (
40- ['git-annex' , 'initremote' , get_s3_remote ()] + annex_options , cwd = dataset_path
75+ ['git-annex' , 'initremote' , get_s3_backup_remote ()]
76+ + generate_s3_annex_options (dataset_path , backup = True ),
77+ cwd = dataset_path ,
78+ env = backup_remote_env (),
4179 )
4280
4381
4482def update_s3_sibling (dataset_path ):
4583 """Update S3 remote with latest config."""
46- annex_options = generate_s3_annex_options (dataset_path )
4784 # note: enableremote command will only upsert config options, none are deleted
4885 subprocess .run (
49- ['git-annex' , 'enableremote' , get_s3_remote ()] + annex_options ,
86+ ['git-annex' , 'enableremote' , get_s3_remote ()]
87+ + generate_s3_annex_options (dataset_path ),
88+ check = True ,
89+ cwd = dataset_path ,
90+ )
91+ subprocess .run (
92+ ['git-annex' , 'enableremote' , get_s3_backup_remote ()]
93+ + generate_s3_annex_options (dataset_path , backup = True ),
5094 check = True ,
5195 cwd = dataset_path ,
96+ env = backup_remote_env (),
5297 )
5398
5499
@@ -87,3 +132,13 @@ def s3_export(dataset_path, target, treeish):
87132 subprocess .check_call (
88133 ['git-annex' , 'export' , treeish , '--to' , target ], cwd = dataset_path
89134 )
135+
136+
137+ def s3_backup_push (dataset_path ):
138+ """Perform an S3 push to the backup remote on a git-annex repo."""
139+ print (backup_remote_env ())
140+ subprocess .check_call (
141+ ['git-annex' , 'push' , get_s3_backup_remote ()],
142+ cwd = dataset_path ,
143+ env = backup_remote_env (),
144+ )
0 commit comments