Skip to content

Commit c05e694

Browse files
authored
Merge pull request #86 from GeoscienceAustralia/upgrades/s3_path
Improvements to S3 paths
2 parents 22f5529 + 341f751 commit c05e694

11 files changed

Lines changed: 209 additions & 222 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ tests/sar_pipeline/data/**/*.json
2121
!tests/sar_pipeline/data/**/*.tif
2222
!tests/sar_pipeline/data/**/*.vrt
2323

24+
# ignore create test data
25+
tests/sar_pipeline/data/isce3_rtc/results/TMP/*
26+
2427
# ignore secret files
2528
*.secret
2629

docs/workflows/aws.md

Lines changed: 84 additions & 75 deletions
Large diffs are not rendered by default.

env.secret.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
EARTHDATA_LOGIN=
2+
EARTHDATA_PASSWORD=
3+
AWS_ACCESS_KEY_ID=
4+
AWS_SECRET_ACCESS_KEY=
5+
AWS_DEFAULT_REGION=
6+
CDSE_LOGIN=
7+
CDSE_PASSWORD=

sar_pipeline/aws/cli.py

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from sar_pipeline.aws.metadata.stac import BurstH5toStacManager
2424
from sar_pipeline.aws.metadata.odc import (
2525
make_static_layer_base_url,
26-
get_collection_number,
2726
)
2827
from sar_pipeline.utils.s3upload import push_files_in_folder_to_s3
2928
from sar_pipeline.utils.general import log_timing
@@ -105,11 +104,10 @@
105104
help="project folder in the s3 bucket",
106105
)
107106
@click.option(
108-
"--collection",
107+
"--collection-number",
109108
required=True,
110-
type=str,
111-
help="collection associated with product. e.g. s1_rtc_c1. Must end in 'cX' where X is an "
112-
"integer number referring to the collection.",
109+
type=int,
110+
help="The collection number of the product.",
113111
)
114112
@click.option(
115113
"--download-folder",
@@ -149,7 +147,7 @@
149147
is_flag=True,
150148
default=False,
151149
help="If static layers should be linked to RTC_S1 products in the"
152-
"STAC metadata. A url to the static layer collection will be added"
150+
"STAC metadata. A url to the static layers will be added"
153151
"to the run config file.",
154152
)
155153
@click.option(
@@ -159,17 +157,16 @@
159157
help="S3 bucket containing the RTC_S1_STATIC data that will be linked to the RTC_S1 bursts.",
160158
)
161159
@click.option(
162-
"--linked-static-layers-collection",
160+
"--linked-static-layers-collection-number",
163161
required=False,
164162
type=str,
165-
help="Collection of RTC_S1_STATIC data that will be linked to the RTC_S1 bursts.",
163+
help="Collection number of RTC_S1_STATIC data that will be linked to the RTC_S1 bursts.",
166164
)
167165
@click.option(
168166
"--linked-static-layers-s3-project-folder",
169167
required=False,
170168
type=str,
171-
help="Project folder containing the RTC_S1_STATIC data that will be linked to the RTC_S1 bursts. "
172-
"Expected for linked files path is : s3_bucket/s3_project_folder/collection/burst_id/*files",
169+
help="Project folder containing the RTC_S1_STATIC data that will be linked to the RTC_S1 bursts. ",
173170
)
174171
@click.option(
175172
"--scene-data-source",
@@ -204,15 +201,15 @@ def get_data_for_scene_and_make_run_config(
204201
backscatter_convention,
205202
s3_bucket,
206203
s3_project_folder,
207-
collection,
204+
collection_number,
208205
download_folder,
209206
scratch_folder,
210207
out_folder,
211208
run_config_save_path,
212209
make_existing_products,
213210
link_static_layers,
214211
linked_static_layers_s3_bucket,
215-
linked_static_layers_collection,
212+
linked_static_layers_collection_number,
216213
linked_static_layers_s3_project_folder,
217214
scene_data_source,
218215
orbit_data_source,
@@ -242,10 +239,6 @@ def get_data_for_scene_and_make_run_config(
242239
if backscatter_convention not in ["gamma0", "sigma0", "beta0"]:
243240
raise ValueError("backscatter_convention must be one of gamma0, sigma0, beta0")
244241

245-
# ensure the collection ends with cX, where X is a positive integer.
246-
# Raise error for invalid naming
247-
_ = get_collection_number(collection)
248-
249242
# sub-folders for downloads
250243
orbit_folder = download_folder / "orbits"
251244
scene_folder = download_folder / "scenes"
@@ -305,7 +298,7 @@ def get_data_for_scene_and_make_run_config(
305298
burst_polarisations=burst_pols,
306299
s3_bucket=s3_bucket,
307300
s3_project_folder=s3_project_folder,
308-
collection=collection,
301+
collection_number=collection_number,
309302
make_existing_products=make_existing_products,
310303
early_exit=True,
311304
early_exit_code=100,
@@ -322,7 +315,7 @@ def get_data_for_scene_and_make_run_config(
322315
scene=scene,
323316
burst_id_list=burst_id_list_to_process,
324317
static_layers_s3_bucket=linked_static_layers_s3_bucket,
325-
static_layers_collection=linked_static_layers_collection,
318+
static_layers_collection_number=linked_static_layers_collection_number,
326319
static_layers_s3_project_folder=linked_static_layers_s3_project_folder,
327320
early_exit_code=101,
328321
)
@@ -493,7 +486,7 @@ def get_data_for_scene_and_make_run_config(
493486
# add the static layer base url
494487
static_layer_base_url = make_static_layer_base_url(
495488
linked_static_layers_s3_bucket,
496-
linked_static_layers_collection,
489+
linked_static_layers_collection_number,
497490
linked_static_layers_s3_project_folder,
498491
)
499492
logger.info(f"static layer base url : {static_layer_base_url}")
@@ -550,11 +543,10 @@ def get_data_for_scene_and_make_run_config(
550543
help="Backscatter convention of the product to be made (gamma0, sigma0 or beta0)",
551544
)
552545
@click.option(
553-
"--collection",
546+
"--collection-number",
554547
required=True,
555-
type=str,
556-
help="collection associated with product. e.g. s1_rtc_c1. Must end in 'cX' where X is an "
557-
"integer number referring to the collection.",
548+
type=int,
549+
help="The collection number of the product.",
558550
)
559551
@click.option(
560552
"--s3-bucket", required=True, type=str, help="The bucket to upload the files"
@@ -605,7 +597,7 @@ def make_rtc_opera_stac_and_upload_bursts(
605597
run_config_path,
606598
product,
607599
backscatter_convention,
608-
collection,
600+
collection_number,
609601
s3_bucket,
610602
s3_project_folder,
611603
skip_upload_to_s3,
@@ -615,7 +607,10 @@ def make_rtc_opera_stac_and_upload_bursts(
615607
):
616608
"""makes STAC metadata for opera-rtc and uploads them to a desired s3 bucket.
617609
The final path in s3 will follow the following pattern:
618-
s3_bucket/s3_folder/collection/burst_id/burst_year/burst_month/burst_day/*files
610+
product = RTC_S1:
611+
s3_bucket/s3_folder/odc_product_name/burst_id/burst_year/burst_month/burst_day/*files
612+
product = RTC_S1_STATIC:
613+
s3_bucket/s3_folder/odc_product_name/burst_id/*files
619614
"""
620615

621616
# iterate through the burst directory and create STAC metadata
@@ -641,7 +636,7 @@ def make_rtc_opera_stac_and_upload_bursts(
641636
h5_filepath=burst_h5_filepath,
642637
product=product,
643638
backscatter_convention=backscatter_convention,
644-
collection=collection,
639+
collection_number=collection_number,
645640
s3_bucket=s3_bucket,
646641
s3_project_folder=s3_project_folder,
647642
)
@@ -703,7 +698,7 @@ def make_rtc_opera_stac_and_upload_bursts(
703698
burst_polarisations=burst_stac_manager.polarisations,
704699
s3_bucket=s3_bucket,
705700
s3_project_folder=s3_project_folder,
706-
collection=collection,
701+
collection_number=collection_number,
707702
make_existing_products=make_existing_products,
708703
early_exit=False, # don't exit early, move to next burst
709704
)

sar_pipeline/aws/metadata/odc.py

Lines changed: 27 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,39 @@
1-
import re
2-
3-
4-
def get_collection_number(collection: str) -> int:
5-
"""Get the collection number from the collection string
6-
7-
Parameters
8-
----------
9-
collection : str
10-
Collection of the product. e.g. s1_rtc_c1. The collection MUST end in cX where X
11-
is an integer associated with the collection. E.g. rtc_s1_c1.
12-
13-
Returns
14-
-------
15-
int
16-
number. e.g. s1_rtc_c1 -> 1
17-
18-
Raises
19-
------
20-
ValueError
21-
Invalid collection name.
22-
"""
23-
24-
# ensure the collection ends with cX, where X is a positive integer
25-
match = re.search(r"c(\d+)$", collection)
26-
if not match:
27-
raise ValueError(
28-
f"Invalid collection name. The collection MUST end in cX where X"
29-
" is an integer associated with the collection. E.g. rtc_s1_c1."
30-
)
31-
return int(match.group(1))
32-
33-
341
def get_odc_product_name(product, collection_number, polarisations):
35-
"""set the odc:product value. WARNING this must align with
2+
"""get the odc product name. WARNING this must align with
363
the DEA product name at indexing into the datacube.
374
These are hard-coded and set by the provided `collection_number`.
385
"""
396
if product == "RTC_S1":
407
if all([pol in polarisations for pol in ["VV", "VH"]]):
41-
return f"ga_s1_iw_vv_vh_c{collection_number}"
8+
return f"ga_s1_nrb_iw_vv_vh_c{collection_number}"
429
elif all([pol in polarisations for pol in ["HH", "HV"]]):
43-
return f"ga_s1_iw_hh_hv_c{collection_number}"
10+
return f"ga_s1_nrb_iw_hh_hv_c{collection_number}"
4411
elif polarisations == ["VV"]:
45-
return f"ga_s1_iw_vv_c{collection_number}"
12+
return f"ga_s1_nrb_iw_vv_c{collection_number}"
4613
elif polarisations == ["HH"]:
47-
return f"ga_s1_iw_hh_c{collection_number}"
14+
return f"ga_s1_nrb_iw_hh_c{collection_number}"
4815
elif product == "RTC_S1_STATIC":
49-
return f"ga_s1_iw_static_c{collection_number}"
16+
return f"ga_s1_nrb_iw_static_c{collection_number}"
5017

5118

5219
def make_rtc_s1_s3_subpath(
5320
s3_project_folder: str,
54-
collection: str,
21+
collection_number: int,
5522
burst_polarisations: list,
5623
burst_id: str,
5724
year: str,
5825
month: str,
5926
day: str,
6027
):
61-
"""Structure for the rtc_s1 product sub-folders. These include
28+
"""Structure for the RTC_S1 product sub-folders. These include
6229
information about when the burst was acquired.
6330
6431
Parameters
6532
----------
6633
s3_project_folder : str
6734
s3 project folder
68-
collection : str
69-
collection. e.g. rtc_s1_static_c1
35+
collection_number : int
36+
collection number as an integer
7037
burst_polarisations: list
7138
list of burst polarisations
7239
burst_id : str
@@ -82,18 +49,18 @@ def make_rtc_s1_s3_subpath(
8249
-------
8350
str
8451
path to the s3 bucket subfolder
85-
e.g. s3_project_folder/c1/s1_rtc_c1/ga_s1_iw_vv_c1/t028_059507_iw2/2022/01/01
52+
e.g. s3_project_folder/ga_s1_nrb_iw_vv_c1/t028_059507_iw2/2022/01/01
8653
"""
87-
# get collection name and number from input collection
88-
c_number = get_collection_number(collection)
89-
# get the odc product name which includes the collection
90-
odc_product_name = get_odc_product_name("RTC_S1", c_number, burst_polarisations)
91-
return f"{s3_project_folder}/c{c_number}/{collection}/{odc_product_name}/{burst_id}/{year}/{month}/{day}"
54+
# get the odc product name which includes the collection number
55+
odc_product_name = get_odc_product_name(
56+
"RTC_S1", collection_number, burst_polarisations
57+
)
58+
return f"{s3_project_folder}/{odc_product_name}/{burst_id}/{year}/{month}/{day}"
9259

9360

9461
def make_rtc_s1_static_s3_subpath(
9562
s3_project_folder: str,
96-
collection: str,
63+
collection_number: int,
9764
burst_id: str,
9865
) -> str:
9966
"""Structure for the bucket subpath for static layers
@@ -102,27 +69,25 @@ def make_rtc_s1_static_s3_subpath(
10269
----------
10370
s3_project_folder : str
10471
s3 project folder
105-
collection : str
106-
collection. e.g. rtc_s1_static_c1
72+
collection_number : int
73+
collection number as an integer
10774
burst_id : str
10875
burst_id. e.g. t028_059507_iw2
10976
11077
Returns
11178
-------
11279
str
11380
path to the s3 bucket subfolder
114-
e.g. s3_project_folder/c1/s1_rtc_static_c1/ga_s1_iw_static_c1/t028_059507_iw2
81+
e.g. s3_project_folder/ga_s1_nrb_iw_static_c1/t028_059507_iw2
11582
"""
116-
# get collection name and number from input collection
117-
c_number = get_collection_number(collection)
118-
# get the odc product name which includes the collection
119-
odc_product_name = get_odc_product_name("RTC_S1_STATIC", c_number, [])
120-
return f"{s3_project_folder}/c{c_number}/{collection}/{odc_product_name}/{burst_id}"
83+
# get the odc product name which includes the collection number
84+
odc_product_name = get_odc_product_name("RTC_S1_STATIC", collection_number, [])
85+
return f"{s3_project_folder}/{odc_product_name}/{burst_id}"
12186

12287

12388
def make_static_layer_base_url(
12489
static_layers_s3_bucket: str,
125-
static_layers_collection: str,
90+
static_layers_collection_number: int,
12691
static_layers_s3_project_folder: str,
12792
s3_region: str = "ap-southeast-2",
12893
) -> str:
@@ -132,8 +97,8 @@ def make_static_layer_base_url(
13297
----------
13398
static_layers_s3_bucket : str
13499
Bucket containing static layer
135-
static_layers_collection : str
136-
collection static layers belong to
100+
static_layers_collection_number : int
101+
collection number of the static layers
137102
static_layers_s3_project_folder : str
138103
project folder within bucket if exists
139104
s3_region : str, optional
@@ -147,7 +112,7 @@ def make_static_layer_base_url(
147112
"""
148113
root_static_layer_path = make_rtc_s1_static_s3_subpath(
149114
s3_project_folder=static_layers_s3_project_folder,
150-
collection=static_layers_collection,
115+
collection_number=static_layers_collection_number,
151116
burst_id="",
152117
)
153118
return (

0 commit comments

Comments
 (0)