Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,9 @@ class ZarrSchemaTransformation(BaseModel):
default_factory=dict,
description="Global attributes to modify. Supports 'delete' and 'set' keys.",
)
var_template_shape: Union[str, List[str]] = Field(
..., description="Variable name(s) used to define the template shape."
)
dimensions: Optional[dict[str, dict[str, Any]]] = None
dataset_sort_by: Optional[list[str]] = None
vars_incompatible_with_region: Optional[list[str]] = None
Expand Down Expand Up @@ -638,6 +641,31 @@ def validate_add_variables(cls, value):

return value

@model_validator(mode="after")
def validate_var_template_shape(self) -> "ZarrSchemaTransformation":
# 1. Ensure it's not empty
if not self.var_template_shape:
raise ValueError("var_template_shape cannot be empty.")

# 2. Coerce to a list for easier validation logic
vars_to_check = (
[self.var_template_shape]
if isinstance(self.var_template_shape, str)
else self.var_template_shape
)

# 3. Check that every variable exists in the dataset_schema
schema_keys = self.dataset_schema.keys()
missing = [v for v in vars_to_check if v not in schema_keys]

if missing:
raise ValueError(
f"var_template_shape variable(s) {missing} not found in 'schema'. "
f"Available variables: {list(schema_keys)}"
)

return self

# @model_validator(mode="after")
# def validate_gattrs_to_variable_dimensions(self) -> "DatasetConfig":
# if self.gattrs_to_variables:
Expand Down
1 change: 1 addition & 0 deletions aodn_cloud_optimised/bin/satellite_sst_1day_aqua.py
1 change: 1 addition & 0 deletions aodn_cloud_optimised/bin/satellite_sst_1day_snpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,18 @@
}
},
"aws_opendata_registry": {
"Name": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"Name": "National Reef Monitoring Network - Global benthic cover data (photoquadrat image annotations)",
"Description": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"Documentation": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"Contact": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"ManagedBy": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"UpdateFrequency": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"Tags": [
"FILL UP MANUALLY - CHECK DOCUMENTATION"
"oceans",
"biology",
"coral reef",
"ecosystems",
"geospatial"
],
"License": "FILL UP MANUALLY - CHECK DOCUMENTATION",
"Resources": [
Expand Down Expand Up @@ -238,7 +242,8 @@
"AuthorName": "FILL UP MANUALLY - CHECK DOCUMENTATION"
}
]
}
},
"Citation": "The citation in a list of references is: \"Reef Life Survey (RLS); Integrated Marine Observing System (IMOS), [year-of-data-download], IMOS - National Reef Monitoring Network Sub-Facility - Global benthic cover data (photoquadrat image annotations), [data-access-URL], accessed [date-of-access].\""
},
"schema_transformation": {
"drop_variables": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,185 +2,44 @@
"dataset_name": "satellite_ghrsst_l3c_4hour_himawari8",
"logger_name": "satellite_ghrsst_l3c_4hour_himawari8",
"parent_config": "satellite_ghrsst_main.json",
"metadata_uuid": "06d2fff4-8e2c-4bd7-b98f-cd98e588df6f",
"schema": {
"time": {
"type": "timestamp[ns]",
"long_name": "reference time of sst file",
"standard_name": "time",
"axis": "T",
"comment": "A typical reference time for the data"
},
"lat": {
"type": "float",
"long_name": "latitude",
"standard_name": "latitude",
"axis": "Y",
"comment": "Latitudes for locating data",
"valid_min": -90.0,
"valid_max": 90.0,
"units": "degrees_north"
},
"lon": {
"type": "float",
"long_name": "longitude",
"standard_name": "longitude",
"axis": "X",
"comment": "Longitudes for locating data",
"valid_min": -180.0,
"valid_max": 360.0,
"units": "degrees_east"
},
"sea_surface_temperature": {
"type": "double",
"valid_min": -32765,
"valid_max": 32765,
"units": "kelvin",
"long_name": "sea surface skin temperature",
"standard_name": "sea_surface_skin_temperature",
"comment": "The skin temperature of the ocean at a depth of approximately 10 microns. SSTs are retrieved by using the Radiative Transfer Model (RTTOV12.3) and Bayesian cloud clearing method based on the ESA CCI SST code developed at the University of Reading",
"calendar": "Standard",
"grid_mapping": "crs"
},
"sses_bias": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"clip_min": -0.7055229215770699,
"clip_max": 0.7937132867742037,
"units": "kelvin",
"long_name": "SSES bias estimate",
"comment": "Bias estimate derived from L2P bias,following the method described in http://imos.org.au/fileadmin/user_upload/shared/SRS/SST/GHRSST-DOC-basic-v1.0r1.pdf. Subtracting sses_bias from sea_surface_temperature produces a more accurate skin SST estimate",
"grid_mapping": "crs"
},
"sses_standard_deviation": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"clip_min": 0.5,
"clip_max": 1.5874823745387598,
"units": "kelvin",
"long_name": "SSES standard deviation estimate",
"comment": "Standard deviation estimate derived from L2P standard deviation, following the method described in http://imos.org.au/fileadmin/user_upload/shared/SRS/SST/GHRSST-DOC-basic-v1.0r1.pdf.",
"grid_mapping": "crs"
},
"quality_level": {
"type": "float",
"valid_min": -127,
"valid_max": 127,
"long_name": "quality level of SST pixel",
"comment": "These are the overall quality indicators and are used for all GHRSST SSTs. Refer Merchant et al., 2019 (https://doi.org/10.1038/s41597-019-0236-x) for more details for logic and threshold for assigning pixel quality level with use of Bayesian cloud clearing method. For validation applications, please consider quality_level greater than or equal 4 with bias correction. For operational applications, please consider quality_level greater than or equal 4 with bias correction. For qualitative applications, please consider quality_level greater than or equal 3 with or without bias correction.",
"flag_meanings": "no_data bad_data worst_quality low_quality acceptable_quality best_quality",
"grid_mapping": "crs",
"flag_values": [
0,
1,
2,
3,
4,
5
]
},
"sst_dtime": {
"type": "double",
"valid_min": -32765,
"valid_max": 32765,
"units": "second",
"long_name": "time difference from reference time",
"comment": "time plus sst_dtime gives seconds after 00:00:00 UTC January 1, 1981.",
"grid_mapping": "crs"
},
"l2p_flags": {
"type": "float",
"valid_min": -32765,
"valid_max": 32765,
"long_name": "L2P flags",
"comment": "These flags are important to properly use the data. Data not flagged as microwave are sourced from an infrared sensor. The lake and river flags are currently not set, but defined in GDS2.0r4. Night flag indicates a night pixel. If night flag is not set then pixel is either daytime or within 5 degrees of a 90 degree solar zenith angle. The terminator flag indicates that the sun is near the horizon. The analysis flag indicates high difference from analysis temperatures (differences greater than Analysis Limit). The lowwind flag indicates regions of low wind speed (typically less than the low Wind Limit) per NWP model. The highwind flag indicates regions of high wind speed (typically greater than the high Wind Limit) per NWP model. Other flags may be populated and are for internal use and the definitions may change, so should not be relied on. Use flag_meanings to confirm the flag assignment that can be relied on. Flags greater than 64 only apply to non-land pixels.",
"flag_meanings": "microwave land ice lake river reserved reserved analysis lowwind highwind night terminator reserved reserved reserved",
"grid_mapping": "crs",
"flag_masks": [
1,
2,
4,
8,
16,
32,
64,
128,
256,
512,
1024,
2048,
4096,
8192,
16384
]
},
"sses_count": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"clip_min": 0.0,
"clip_max": 582.2729491442008,
"units": "count",
"long_name": "SSES count",
"comment": "Weighted representative number of swath pixels, per https://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-ghrsstfilefields. EXPERIMENTAL_FIELD",
"grid_mapping": "crs"
},
"dt_analysis": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"units": "Kelvin",
"long_name": "deviation from last SST analysis",
"comment": "The difference between this SST and the previous day's L4 Foundation SST.",
"source": "ABOM-L4LRfnd-GLOB-GAMSSA_28km",
"grid_mapping": "crs"
},
"wind_speed": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"clip_min": 0.0,
"clip_max": 17.645092003135954,
"units": "m s-1",
"long_name": "wind_speed",
"standard_name": "wind_speed",
"comment": "Typically represent surface winds (10 meters above the sea surface).",
"source": "ACCESSG-ABOM-Forecast-WSP",
"grid_mapping": "crs"
},
"satellite_zenith_angle": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"clip_min": 0.0,
"clip_max": 69.9903600185546,
"units": "angular_degree",
"long_name": "satellite_zenith angle",
"comment": "The satellite zenith angle at the time of the SST observations",
"grid_mapping": "crs"
"cloud_optimised_format": "zarr",
"run_settings": {
"paths": [
{
"type": "files",
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3C-4h/h08/",
"filter": [
".*\\.nc$"
],
"year_range": [
2015,
2022
]
}
],
"cluster": {
"mode": "coiled",
"restart_every_path": true
},
"sea_ice_fraction": {
"type": "double",
"valid_min": -127,
"valid_max": 127,
"units": "1",
"long_name": "sea_ice_fraction",
"standard_name": "sea_ice_area_fraction",
"comment": "Fractional sea ice cover (unitless) derived from near real-time UKMO OSTIA Daily 0.05 degree L4, an optimal interpolation of the operational near real-time EUMETSAT OSI-SAF SSMIS daily 10 km Level 3 sea ice concentration fields (Good et al., 2020, Remote Sensing, https://dx.doi.org/10.3390/rs12040720).",
"source": "OSTIA-UKMO-L4-GLOB-v2.0",
"grid_mapping": "crs"
"clear_existing_data": true,
"raise_error": false,
"coiled_cluster_options": {
"n_workers": [
25,
100
],
"scheduler_vm_types": "m7i-flex.large",
"worker_vm_types": "m7i-flex.large",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 4,
"memory_limit": "32GB"
}
},
"crs": {
"type": "int32",
"long_name": "coordinate reference system",
"grid_mapping_name": "latitude_longitude",
"semi_major_axis": 6379137.0,
"inverse_flattening": 298.257223563,
"epsg_code": "EPSG:4326"
}
"batch_size": 20
},
"metadata_uuid": "06d2fff4-8e2c-4bd7-b98f-cd98e588df6f",
"aws_opendata_registry": {
"Name": "Satellite - Sea surface temperature - Level 3 - Single sensor - Himawari-8 - 4 hour",
"Description": "This is a regional GHRSST level 3 collated (L3C) dataset on 0.02-degree rectangular grid over the Australasian domain (70E to 190E, 70S to 20N) based on retrievals from the AHI imager on board Himawari-8 satellite. The Bureau of Meteorology (Bureau) produces Integrated Marine Observing System (IMOS) satellite SST products in the International Group for High Resolution SST (GHRSST) GDS2 file formats for Himawari-8 in real time and delayed mode. This product is composed of reprocessed multi-swath SSTskin retrievals obtained from compositing IMOS Himawari-8 hourly L3C files over the night (before dawn). \n\n\nEvery 10 minutes, the Himawari-8 full disk is processed to retrieve SSTs by using the Radiative Transfer Model (RTTOV12.3) and Bayesian cloud clearing method based on the ESA CCI SST code developed at the University of Reading. The hourly product on the native grid is then produced by compositing over multiple swaths for the previous 1 hour by selecting the highest quality data with priority given to the value closest in time to the product nominal hour. L3C-04hour SSTs are the latest 4-hour pixels with the highest quality taken from the hourly SST product. The L3C-04hour product on native grid is then remapped over the 0.02-degree IMOS grid to compose the IMOS L3C-04hour product (Govekar et al., 2021, https://www.foo.org.au/wp-content/uploads/2021/11/Govekar_FOO_2021.pdf). The product format is compliant with the GHRSST Data Specification (GDS) version 2.",
Expand All @@ -195,7 +54,7 @@
"License": "http://creativecommons.org/licenses/by/4.0/",
"Resources": [
{
"Description": "Cloud Optimised AODN dataset of IMOS - SRS - SST - L3C - Himawari-8 - 4 hour - Australia",
"Description": "Cloud Optimised AODN dataset of FILL UP MANUALLY - CHECK DOCUMENTATION",
"ARN": "arn:aws:s3:::aodn-cloud-optimised/satellite_ghrsst_l3c_4hour_himawari8.zarr",
"Region": "ap-southeast-2",
"Type": "S3 Bucket"
Expand All @@ -204,7 +63,7 @@
"DataAtWork": {
"Tutorials": [
{
"Title": "Accessing IMOS - SRS - SST - L3C - Himawari-8 - 4 hour - Australia",
"Title": "Accessing FILL UP MANUALLY - CHECK DOCUMENTATION",
"URL": "https://github.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3c_4hour_himawari8.ipynb",
"NotebookURL": "https://githubtocolab.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3c_4hour_himawari8.ipynb",
"AuthorName": "Laurent Besnard",
Expand All @@ -220,61 +79,5 @@
]
},
"Citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\""
},
"run_settings": {
"coiled_cluster_options": {
"n_workers": [
30,
150
],
"scheduler_vm_types": "m7i.2xlarge",
"worker_vm_types": "m7i.2xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 16,
"memory_limit": "64GB"
}
},
"batch_size": 100,
"cluster": {
"mode": "coiled",
"restart_every_path": true
},
"paths": [
{
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3C-4h/h08",
"filter": [],
"year_range": [
2015,
2024
]
}
],
"clear_existing_data": true,
"raise_error": false
},
"schema_transformation": {
"global_attributes": {
"set": {
"title": ""
}
},
"dimensions": {
"time": {
"name": "time",
"chunk": 100,
"rechunk": true,
"append_dim": true
},
"latitude": {
"name": "lat",
"chunk": 100
},
"longitude": {
"name": "lon",
"chunk": 100
}
}
}
}
Binary file not shown.
Loading
Loading