Skip to content

Commit acfde1d

Browse files
authored
Merge pull request #247 from aodn/SrsGhrsstNew2
SrsGhrsstNew2
2 parents 22ef6ed + f0fb864 commit acfde1d

24 files changed

+1917
-434
lines changed

aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,9 @@ class ZarrSchemaTransformation(BaseModel):
478478
default_factory=dict,
479479
description="Global attributes to modify. Supports 'delete' and 'set' keys.",
480480
)
481+
var_template_shape: Union[str, List[str]] = Field(
482+
..., description="Variable name(s) used to define the template shape."
483+
)
481484
dimensions: Optional[dict[str, dict[str, Any]]] = None
482485
dataset_sort_by: Optional[list[str]] = None
483486
vars_incompatible_with_region: Optional[list[str]] = None
@@ -660,6 +663,31 @@ def validate_add_variables(cls, value):
660663

661664
return value
662665

666+
@model_validator(mode="after")
667+
def validate_var_template_shape(self) -> "ZarrSchemaTransformation":
668+
# 1. Ensure it's not empty
669+
if not self.var_template_shape:
670+
raise ValueError("var_template_shape cannot be empty.")
671+
672+
# 2. Coerce to a list for easier validation logic
673+
vars_to_check = (
674+
[self.var_template_shape]
675+
if isinstance(self.var_template_shape, str)
676+
else self.var_template_shape
677+
)
678+
679+
# 3. Check that every variable exists in the dataset_schema
680+
schema_keys = self.dataset_schema.keys()
681+
missing = [v for v in vars_to_check if v not in schema_keys]
682+
683+
if missing:
684+
raise ValueError(
685+
f"var_template_shape variable(s) {missing} not found in 'schema'. "
686+
f"Available variables: {list(schema_keys)}"
687+
)
688+
689+
return self
690+
663691
# @model_validator(mode="after")
664692
# def validate_gattrs_to_variable_dimensions(self) -> "DatasetConfig":
665693
# if self.gattrs_to_variables:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py

aodn_cloud_optimised/config/dataset/diver_photoquadrat_score_qc.json

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,14 +192,18 @@
192192
}
193193
},
194194
"aws_opendata_registry": {
195-
"Name": "FILL UP MANUALLY - CHECK DOCUMENTATION",
195+
"Name": "National Reef Monitoring Network - Global benthic cover data (photoquadrat image annotations)",
196196
"Description": "FILL UP MANUALLY - CHECK DOCUMENTATION",
197197
"Documentation": "FILL UP MANUALLY - CHECK DOCUMENTATION",
198198
"Contact": "FILL UP MANUALLY - CHECK DOCUMENTATION",
199199
"ManagedBy": "FILL UP MANUALLY - CHECK DOCUMENTATION",
200200
"UpdateFrequency": "FILL UP MANUALLY - CHECK DOCUMENTATION",
201201
"Tags": [
202-
"FILL UP MANUALLY - CHECK DOCUMENTATION"
202+
"oceans",
203+
"biology",
204+
"coral reef",
205+
"ecosystems",
206+
"geospatial"
203207
],
204208
"License": "FILL UP MANUALLY - CHECK DOCUMENTATION",
205209
"Resources": [
@@ -238,7 +242,8 @@
238242
"AuthorName": "FILL UP MANUALLY - CHECK DOCUMENTATION"
239243
}
240244
]
241-
}
245+
},
246+
"Citation": "The citation in a list of references is: \"Reef Life Survey (RLS); Integrated Marine Observing System (IMOS), [year-of-data-download], IMOS - National Reef Monitoring Network Sub-Facility - Global benthic cover data (photoquadrat image annotations), [data-access-URL], accessed [date-of-access].\""
242247
},
243248
"schema_transformation": {
244249
"drop_variables": [],

aodn_cloud_optimised/config/dataset/satellite_ghrsst_l3c_4hour_himawari8.json

Lines changed: 37 additions & 234 deletions
Original file line numberDiff line numberDiff line change
@@ -2,185 +2,44 @@
22
"dataset_name": "satellite_ghrsst_l3c_4hour_himawari8",
33
"logger_name": "satellite_ghrsst_l3c_4hour_himawari8",
44
"parent_config": "satellite_ghrsst_main.json",
5-
"metadata_uuid": "06d2fff4-8e2c-4bd7-b98f-cd98e588df6f",
6-
"schema": {
7-
"time": {
8-
"type": "timestamp[ns]",
9-
"long_name": "reference time of sst file",
10-
"standard_name": "time",
11-
"axis": "T",
12-
"comment": "A typical reference time for the data"
13-
},
14-
"lat": {
15-
"type": "float",
16-
"long_name": "latitude",
17-
"standard_name": "latitude",
18-
"axis": "Y",
19-
"comment": "Latitudes for locating data",
20-
"valid_min": -90.0,
21-
"valid_max": 90.0,
22-
"units": "degrees_north"
23-
},
24-
"lon": {
25-
"type": "float",
26-
"long_name": "longitude",
27-
"standard_name": "longitude",
28-
"axis": "X",
29-
"comment": "Longitudes for locating data",
30-
"valid_min": -180.0,
31-
"valid_max": 360.0,
32-
"units": "degrees_east"
33-
},
34-
"sea_surface_temperature": {
35-
"type": "double",
36-
"valid_min": -32765,
37-
"valid_max": 32765,
38-
"units": "kelvin",
39-
"long_name": "sea surface skin temperature",
40-
"standard_name": "sea_surface_skin_temperature",
41-
"comment": "The skin temperature of the ocean at a depth of approximately 10 microns. SSTs are retrieved by using the Radiative Transfer Model (RTTOV12.3) and Bayesian cloud clearing method based on the ESA CCI SST code developed at the University of Reading",
42-
"calendar": "Standard",
43-
"grid_mapping": "crs"
44-
},
45-
"sses_bias": {
46-
"type": "double",
47-
"valid_min": -127,
48-
"valid_max": 127,
49-
"clip_min": -0.7055229215770699,
50-
"clip_max": 0.7937132867742037,
51-
"units": "kelvin",
52-
"long_name": "SSES bias estimate",
53-
"comment": "Bias estimate derived from L2P bias,following the method described in http://imos.org.au/fileadmin/user_upload/shared/SRS/SST/GHRSST-DOC-basic-v1.0r1.pdf. Subtracting sses_bias from sea_surface_temperature produces a more accurate skin SST estimate",
54-
"grid_mapping": "crs"
55-
},
56-
"sses_standard_deviation": {
57-
"type": "double",
58-
"valid_min": -127,
59-
"valid_max": 127,
60-
"clip_min": 0.5,
61-
"clip_max": 1.5874823745387598,
62-
"units": "kelvin",
63-
"long_name": "SSES standard deviation estimate",
64-
"comment": "Standard deviation estimate derived from L2P standard deviation, following the method described in http://imos.org.au/fileadmin/user_upload/shared/SRS/SST/GHRSST-DOC-basic-v1.0r1.pdf.",
65-
"grid_mapping": "crs"
66-
},
67-
"quality_level": {
68-
"type": "float",
69-
"valid_min": -127,
70-
"valid_max": 127,
71-
"long_name": "quality level of SST pixel",
72-
"comment": "These are the overall quality indicators and are used for all GHRSST SSTs. Refer Merchant et al., 2019 (https://doi.org/10.1038/s41597-019-0236-x) for more details for logic and threshold for assigning pixel quality level with use of Bayesian cloud clearing method. For validation applications, please consider quality_level greater than or equal 4 with bias correction. For operational applications, please consider quality_level greater than or equal 4 with bias correction. For qualitative applications, please consider quality_level greater than or equal 3 with or without bias correction.",
73-
"flag_meanings": "no_data bad_data worst_quality low_quality acceptable_quality best_quality",
74-
"grid_mapping": "crs",
75-
"flag_values": [
76-
0,
77-
1,
78-
2,
79-
3,
80-
4,
81-
5
82-
]
83-
},
84-
"sst_dtime": {
85-
"type": "double",
86-
"valid_min": -32765,
87-
"valid_max": 32765,
88-
"units": "second",
89-
"long_name": "time difference from reference time",
90-
"comment": "time plus sst_dtime gives seconds after 00:00:00 UTC January 1, 1981.",
91-
"grid_mapping": "crs"
92-
},
93-
"l2p_flags": {
94-
"type": "float",
95-
"valid_min": -32765,
96-
"valid_max": 32765,
97-
"long_name": "L2P flags",
98-
"comment": "These flags are important to properly use the data. Data not flagged as microwave are sourced from an infrared sensor. The lake and river flags are currently not set, but defined in GDS2.0r4. Night flag indicates a night pixel. If night flag is not set then pixel is either daytime or within 5 degrees of a 90 degree solar zenith angle. The terminator flag indicates that the sun is near the horizon. The analysis flag indicates high difference from analysis temperatures (differences greater than Analysis Limit). The lowwind flag indicates regions of low wind speed (typically less than the low Wind Limit) per NWP model. The highwind flag indicates regions of high wind speed (typically greater than the high Wind Limit) per NWP model. Other flags may be populated and are for internal use and the definitions may change, so should not be relied on. Use flag_meanings to confirm the flag assignment that can be relied on. Flags greater than 64 only apply to non-land pixels.",
99-
"flag_meanings": "microwave land ice lake river reserved reserved analysis lowwind highwind night terminator reserved reserved reserved",
100-
"grid_mapping": "crs",
101-
"flag_masks": [
102-
1,
103-
2,
104-
4,
105-
8,
106-
16,
107-
32,
108-
64,
109-
128,
110-
256,
111-
512,
112-
1024,
113-
2048,
114-
4096,
115-
8192,
116-
16384
117-
]
118-
},
119-
"sses_count": {
120-
"type": "double",
121-
"valid_min": -127,
122-
"valid_max": 127,
123-
"clip_min": 0.0,
124-
"clip_max": 582.2729491442008,
125-
"units": "count",
126-
"long_name": "SSES count",
127-
"comment": "Weighted representative number of swath pixels, per https://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-ghrsstfilefields. EXPERIMENTAL_FIELD",
128-
"grid_mapping": "crs"
129-
},
130-
"dt_analysis": {
131-
"type": "double",
132-
"valid_min": -127,
133-
"valid_max": 127,
134-
"units": "Kelvin",
135-
"long_name": "deviation from last SST analysis",
136-
"comment": "The difference between this SST and the previous day's L4 Foundation SST.",
137-
"source": "ABOM-L4LRfnd-GLOB-GAMSSA_28km",
138-
"grid_mapping": "crs"
139-
},
140-
"wind_speed": {
141-
"type": "double",
142-
"valid_min": -127,
143-
"valid_max": 127,
144-
"clip_min": 0.0,
145-
"clip_max": 17.645092003135954,
146-
"units": "m s-1",
147-
"long_name": "wind_speed",
148-
"standard_name": "wind_speed",
149-
"comment": "Typically represent surface winds (10 meters above the sea surface).",
150-
"source": "ACCESSG-ABOM-Forecast-WSP",
151-
"grid_mapping": "crs"
152-
},
153-
"satellite_zenith_angle": {
154-
"type": "double",
155-
"valid_min": -127,
156-
"valid_max": 127,
157-
"clip_min": 0.0,
158-
"clip_max": 69.9903600185546,
159-
"units": "angular_degree",
160-
"long_name": "satellite_zenith angle",
161-
"comment": "The satellite zenith angle at the time of the SST observations",
162-
"grid_mapping": "crs"
5+
"cloud_optimised_format": "zarr",
6+
"run_settings": {
7+
"paths": [
8+
{
9+
"type": "files",
10+
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3C-4h/h08/",
11+
"filter": [
12+
".*\\.nc$"
13+
],
14+
"year_range": [
15+
2015,
16+
2022
17+
]
18+
}
19+
],
20+
"cluster": {
21+
"mode": "coiled",
22+
"restart_every_path": true
16323
},
164-
"sea_ice_fraction": {
165-
"type": "double",
166-
"valid_min": -127,
167-
"valid_max": 127,
168-
"units": "1",
169-
"long_name": "sea_ice_fraction",
170-
"standard_name": "sea_ice_area_fraction",
171-
"comment": "Fractional sea ice cover (unitless) derived from near real-time UKMO OSTIA Daily 0.05 degree L4, an optimal interpolation of the operational near real-time EUMETSAT OSI-SAF SSMIS daily 10 km Level 3 sea ice concentration fields (Good et al., 2020, Remote Sensing, https://dx.doi.org/10.3390/rs12040720).",
172-
"source": "OSTIA-UKMO-L4-GLOB-v2.0",
173-
"grid_mapping": "crs"
24+
"clear_existing_data": true,
25+
"raise_error": false,
26+
"coiled_cluster_options": {
27+
"n_workers": [
28+
25,
29+
100
30+
],
31+
"scheduler_vm_types": "m7i-flex.large",
32+
"worker_vm_types": "m7i-flex.large",
33+
"allow_ingress_from": "me",
34+
"compute_purchase_option": "spot_with_fallback",
35+
"worker_options": {
36+
"nthreads": 4,
37+
"memory_limit": "32GB"
38+
}
17439
},
175-
"crs": {
176-
"type": "int32",
177-
"long_name": "coordinate reference system",
178-
"grid_mapping_name": "latitude_longitude",
179-
"semi_major_axis": 6379137.0,
180-
"inverse_flattening": 298.257223563,
181-
"epsg_code": "EPSG:4326"
182-
}
40+
"batch_size": 20
18341
},
42+
"metadata_uuid": "06d2fff4-8e2c-4bd7-b98f-cd98e588df6f",
18443
"aws_opendata_registry": {
18544
"Name": "Satellite - Sea surface temperature - Level 3 - Single sensor - Himawari-8 - 4 hour",
18645
"Description": "This is a regional GHRSST level 3 collated (L3C) dataset on 0.02-degree rectangular grid over the Australasian domain (70E to 190E, 70S to 20N) based on retrievals from the AHI imager on board Himawari-8 satellite. The Bureau of Meteorology (Bureau) produces Integrated Marine Observing System (IMOS) satellite SST products in the International Group for High Resolution SST (GHRSST) GDS2 file formats for Himawari-8 in real time and delayed mode. This product is composed of reprocessed multi-swath SSTskin retrievals obtained from compositing IMOS Himawari-8 hourly L3C files over the night (before dawn). \n\n\nEvery 10 minutes, the Himawari-8 full disk is processed to retrieve SSTs by using the Radiative Transfer Model (RTTOV12.3) and Bayesian cloud clearing method based on the ESA CCI SST code developed at the University of Reading. The hourly product on the native grid is then produced by compositing over multiple swaths for the previous 1 hour by selecting the highest quality data with priority given to the value closest in time to the product nominal hour. L3C-04hour SSTs are the latest 4-hour pixels with the highest quality taken from the hourly SST product. The L3C-04hour product on native grid is then remapped over the 0.02-degree IMOS grid to compose the IMOS L3C-04hour product (Govekar et al., 2021, https://www.foo.org.au/wp-content/uploads/2021/11/Govekar_FOO_2021.pdf). The product format is compliant with the GHRSST Data Specification (GDS) version 2.",
@@ -195,7 +54,7 @@
19554
"License": "http://creativecommons.org/licenses/by/4.0/",
19655
"Resources": [
19756
{
198-
"Description": "Cloud Optimised AODN dataset of IMOS - SRS - SST - L3C - Himawari-8 - 4 hour - Australia",
57+
"Description": "Cloud Optimised AODN dataset of FILL UP MANUALLY - CHECK DOCUMENTATION",
19958
"ARN": "arn:aws:s3:::aodn-cloud-optimised/satellite_ghrsst_l3c_4hour_himawari8.zarr",
20059
"Region": "ap-southeast-2",
20160
"Type": "S3 Bucket"
@@ -204,7 +63,7 @@
20463
"DataAtWork": {
20564
"Tutorials": [
20665
{
207-
"Title": "Accessing IMOS - SRS - SST - L3C - Himawari-8 - 4 hour - Australia",
66+
"Title": "Accessing FILL UP MANUALLY - CHECK DOCUMENTATION",
20867
"URL": "https://github.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3c_4hour_himawari8.ipynb",
20968
"NotebookURL": "https://githubtocolab.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3c_4hour_himawari8.ipynb",
21069
"AuthorName": "Laurent Besnard",
@@ -220,61 +79,5 @@
22079
]
22180
},
22281
"Citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\""
223-
},
224-
"run_settings": {
225-
"coiled_cluster_options": {
226-
"n_workers": [
227-
30,
228-
150
229-
],
230-
"scheduler_vm_types": "m7i.2xlarge",
231-
"worker_vm_types": "m7i.2xlarge",
232-
"allow_ingress_from": "me",
233-
"compute_purchase_option": "spot_with_fallback",
234-
"worker_options": {
235-
"nthreads": 16,
236-
"memory_limit": "64GB"
237-
}
238-
},
239-
"batch_size": 100,
240-
"cluster": {
241-
"mode": "coiled",
242-
"restart_every_path": true
243-
},
244-
"paths": [
245-
{
246-
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3C-4h/h08",
247-
"filter": [],
248-
"year_range": [
249-
2015,
250-
2024
251-
]
252-
}
253-
],
254-
"clear_existing_data": true,
255-
"raise_error": false
256-
},
257-
"schema_transformation": {
258-
"global_attributes": {
259-
"set": {
260-
"title": ""
261-
}
262-
},
263-
"dimensions": {
264-
"time": {
265-
"name": "time",
266-
"chunk": 100,
267-
"rechunk": true,
268-
"append_dim": true
269-
},
270-
"latitude": {
271-
"name": "lat",
272-
"chunk": 100
273-
},
274-
"longitude": {
275-
"name": "lon",
276-
"chunk": 100
277-
}
278-
}
27982
}
28083
}
Binary file not shown.

0 commit comments

Comments
 (0)