Skip to content

Commit 0903ad3

Browse files
authored
Merge pull request #242 from aodn/ZarrL3SM_1month
Zarr l3 sm 1month
2 parents 5f6e33b + 85afa1d commit 0903ad3

22 files changed

+2170
-619
lines changed

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ Visit the documentation on [ReadTheDocs](https://aodn-cloud-optimised.readthedoc
2020

2121
### Data Conversion
2222
- Convert **CSV** or **NetCDF** (single or multidimensional) to **Zarr** or **Parquet**.
23-
- **Dataset configuration:** YAML-based configuration with inheritance, allowing similar datasets to share settings.
23+
- **Dataset configuration:** YAML-based configuration with inheritance, allowing similar datasets to share settings.
2424
Example: [Radar ACORN](https://github.com/aodn/aodn_cloud_optimised/tree/main/aodn_cloud_optimised/config/dataset), [GHRSST](https://www.ghrsst.org/).
2525
- Semi-automatic creation of dataset configuration: [ReadTheDocs guide](https://aodn-cloud-optimised.readthedocs.io/en/latest/development/dataset-configuration.html#create-dataset-configuration-semi-automatic).
26-
- Generic handlers for standard datasets:
27-
[GenericParquetHandler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/GenericParquetHandler.py),
26+
- Generic handlers for standard datasets:
27+
[GenericParquetHandler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/GenericParquetHandler.py),
2828
[GenericZarrHandler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/GenericZarrHandler.py)
29-
- Custom handlers can inherit from generic handlers:
30-
[Argo handler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/ArgoHandler.py),
29+
- Custom handlers can inherit from generic handlers:
30+
[Argo handler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/ArgoHandler.py),
3131
[Mooring Timeseries Handler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py)
3232

3333
### Clustering & Parallel Processing
@@ -39,8 +39,8 @@ Visit the documentation on [ReadTheDocs](https://aodn-cloud-optimised.readthedoc
3939
- Automatic restart of remote cluster upon Dask failure.
4040
- **Zarr:** Gridded datasets are processed in batch and in parallel using [`xarray.open_mfdataset`](https://xarray.pydata.org/en/stable/generated/xarray.open_mfdataset.html).
4141
- **Parquet:** Tabular files are processed in batch and in parallel as independent tasks, implemented with `concurrent.futures.Future`.
42-
- **S3 / S3-Compatible Storage Support:**
43-
Support for AWS S3 and S3-compatible endpoints (e.g., MinIO, LocalStack) with configurable input/output buckets and authentication via `s3fs` and `boto3`.
42+
- **S3 / S3-Compatible Storage Support:**
43+
Support for AWS S3 and S3-compatible endpoints (e.g., MinIO, LocalStack) with configurable input/output buckets and authentication via `s3fs` and `boto3`.
4444
### Reprocessing
4545
- **Zarr:** Reprocessing is achieved by writing to specific slices, including non-contiguous regions.
4646
- **Parquet:** Reprocessing uses PyArrow internal overwriting; can also be forced when input files change significantly.
@@ -56,7 +56,7 @@ See [doc](https://aodn-cloud-optimised.readthedocs.io/en/latest/development/data
5656
- variable attribute -> variable
5757
- filename part -> variable
5858
- ...
59-
59+
6060
### Metadata
6161
- **Parquet:** Metadata stored as a sidecar `_metadata.parquet` file for faster queries and schema discovery.
6262

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generic_launcher.py

aodn_cloud_optimised/config/dataset/satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia.json

Lines changed: 36 additions & 230 deletions
Original file line numberDiff line numberDiff line change
@@ -2,198 +2,44 @@
22
"dataset_name": "satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia",
33
"logger_name": "satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia",
44
"parent_config": "satellite_ghrsst_main.json",
5-
"metadata_uuid": "7d8d046b-6002-4897-b781-04e7324d6af8",
6-
"schema": {
7-
"lon": {
8-
"type": "float",
9-
"axis": "X",
10-
"comment": "Longitudes for locating data",
11-
"long_name": "longitude",
12-
"standard_name": "longitude",
13-
"units": "degrees_east",
14-
"valid_max": 360.0,
15-
"valid_min": -180.0
16-
},
17-
"lat": {
18-
"type": "float",
19-
"long_name": "latitude",
20-
"units": "degrees_north",
21-
"valid_min": -90.0,
22-
"valid_max": 90.0,
23-
"axis": "Y",
24-
"comment": "Latitudes for locating data",
25-
"standard_name": "latitude"
26-
},
27-
"time": {
28-
"type": "timestamp[ns]",
29-
"long_name": "reference time of sst file",
30-
"axis": "T",
31-
"comment": "A typical reference time for data",
32-
"standard_name": "time"
33-
},
34-
"sea_surface_temperature": {
35-
"type": "double",
36-
"long_name": "sea surface skin temperature",
37-
"units": "kelvin",
38-
"comment": "The skin temperature of the ocean at a depth of approximately 10 microns",
39-
"standard_name": "sea_surface_skin_temperature",
40-
"valid_min": -32767,
41-
"valid_max": 32767
42-
},
43-
"sst_dtime": {
44-
"type": "double",
45-
"long_name": "time difference from reference time",
46-
"units": "second",
47-
"comment": "time plus sst_dtime gives seconds after 00:00:00 UTC January 1, 1981",
48-
"valid_min": -2147483645,
49-
"valid_max": 2147483645
50-
},
51-
"dt_analysis": {
52-
"type": "double",
53-
"long_name": "deviation from last SST analysis",
54-
"units": "kelvin",
55-
"comment": "The difference between this SST and the previous day's SST",
56-
"source": "ABOM-L4LRfnd-GLOB-GAMSSA_28km;various",
57-
"valid_min": -127,
58-
"valid_max": 127
59-
},
60-
"wind_speed": {
61-
"type": "double",
62-
"long_name": "wind speed",
63-
"units": "m s-1",
64-
"comment": "Typically represent mean surface winds (10 meters above the sea surface)",
65-
"standard_name": "wind_speed",
66-
"source": "ACCESSG-ABOM-Analysis-WSP;various",
67-
"height": "10m",
68-
"valid_min": -127,
69-
"valid_max": 127
70-
},
71-
"sea_ice_fraction": {
72-
"type": "double",
73-
"long_name": "sea ice fraction",
74-
"units": "1",
75-
"comment": "Fractional sea ice cover (unitless) derived from near real-time UKMO OSTIA Daily 0.05 degree L4, an optimal interpolation of the operational near real-time EUMETSAT OSI-SAF SSMIS daily 10 km Level 3 sea ice concentration fields (Good et al., 2020, Remote Sensing, https://dx.doi.org/10.3390/rs12040720).",
76-
"standard_name": "sea_ice_area_fraction",
77-
"source": "OSTIA-UKMO-L4-GLOB-v2.0",
78-
"valid_min": -127,
79-
"valid_max": 127
80-
},
81-
"sea_ice_fraction_dtime_from_sst": {
82-
"type": "double",
83-
"long_name": "time difference of sea ice fraction measurement from sst measurement",
84-
"units": "hour",
85-
"comment": "The time difference in hours is estimated from the SST and sea ice data sets",
86-
"source": "OSTIA-UKMO-L4-GLOB-v2.0",
87-
"valid_min": -127,
88-
"valid_max": 127
89-
},
90-
"aerosol_dynamic_indicator": {
91-
"type": "double",
92-
"long_name": "aerosol dynamic indicator",
93-
"units": "count",
94-
"comment": "aerosol dynamic indicator",
95-
"source": "OSDPD-AOD-Analysis-daily;various",
96-
"valid_min": -127,
97-
"valid_max": 127
98-
},
99-
"satellite_zenith_angle": {
100-
"type": "double",
101-
"long_name": "satellite zenith angle",
102-
"units": "angular_degree",
103-
"comment": "The satellite zenith angle at the time of the SST observations",
104-
"valid_min": -127,
105-
"valid_max": 127
106-
},
107-
"l2p_flags": {
108-
"type": "float",
109-
"long_name": "L2P flags",
110-
"valid_min": 0,
111-
"valid_max": 32767,
112-
"comment": "These flags are important to properly use the data. Data not flagged as microwave are sourced from an infrared sensor. The lake and river flags are currently not set, but defined in GDS2.0r4. The aerosol flag indicates high aerosol concentration. The analysis flag indicates high difference from analysis temperatures (differences greater than Analysis Limit). The lowwind flag indicates regions of low wind speed (typically less than the low Wind Limit) per NWP model. The highwind flag indicates regions of high wind speed (typically greater than the high Wind Limit) per NWP model. See wind limits in the comment field for the actual values. The edge flag indicates pixel sizes that are larger than Pixel Spread times the size of the pixel in the center of the field of view in either lat or lon direction. The terminator flag indicates that the sun is near the horizon. The reflector flag indicates that the satellite would receive direct reflected sunlight if the earth was a perfect mirror. The swath flag is used in gridded files to indicate if the pixel could have been seen by the satellite. delta_dn indicates that the day.night sst algorithm was different from the standard algorithm. Other flags may be populated and are for internal use and the definitions may change, so should not be relied on. Flags greater than 64 only apply to non-land pixels",
113-
"flag_masks": [
114-
1,
115-
2,
116-
4,
117-
8,
118-
16,
119-
32,
120-
64,
121-
128,
122-
256,
123-
512,
124-
1024,
125-
2048,
126-
4096,
127-
8192,
128-
16384
129-
],
130-
"flag_meanings": "microwave land ice lake river reserved aerosol analysis lowwind highwind edge terminator reflector swath delta_dn"
5+
"cloud_optimised_format": "zarr",
6+
"run_settings": {
7+
"paths": [
8+
{
9+
"type": "files",
10+
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3SGM-1d/ngt/",
11+
"filter": [
12+
".*\\.nc$"
13+
],
14+
"year_range": [
15+
2015,
16+
2022
17+
]
18+
}
19+
],
20+
"cluster": {
21+
"mode": "coiled",
22+
"restart_every_path": true
13123
},
132-
"quality_level": {
133-
"type": "float",
134-
"long_name": "quality level of SST pixel",
135-
"valid_min": 0,
136-
"valid_max": 5,
137-
"comment": "These are the overall quality indicators and are used for all GHRSST SSTs. The quality level in this case is the minimum of the original quality_level assigned by L3U data provider and quality level calculated using Sensor Specific Error Statistics (SSES). The latter is calculated using bias and standard deviation estimates as described in Griffin et al. (2017) Appendix A at http://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-references/. For validation applications, please consider quality_level greater than or equal 4 with bias correction. For operational applications, please consider quality_level greater than or equal 3 with bias correction. For qualitative applications, please consider quality_level greater than or equal 2 with or without bias correction.",
138-
"flag_values": [
139-
0,
140-
1,
141-
2,
142-
3,
143-
4,
144-
5
24+
"clear_existing_data": true,
25+
"raise_error": false,
26+
"coiled_cluster_options": {
27+
"n_workers": [
28+
35,
29+
120
14530
],
146-
"flag_meanings": "no_data bad_data worst_quality low_quality acceptable_quality best_quality"
147-
},
148-
"sses_bias": {
149-
"type": "double",
150-
"long_name": "SSES bias estimate",
151-
"units": "kelvin",
152-
"comment": "Bias estimate derived from contributing L3C sses_bias values per http://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-references/",
153-
"valid_min": -127,
154-
"valid_max": 127
155-
},
156-
"sses_standard_deviation": {
157-
"type": "double",
158-
"long_name": "SSES standard deviation estimate",
159-
"units": "kelvin",
160-
"comment": "Standard deviation estimate derived from contributing L3C sses_standard_deviation values per http://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-references/",
161-
"valid_min": -127,
162-
"valid_max": 127
163-
},
164-
"sses_count": {
165-
"type": "double",
166-
"long_name": "SSES count",
167-
"units": "count",
168-
"comment": "Weighted representative number of swath pixels. EXPERIMENTAL_FIELD",
169-
"valid_min": -127,
170-
"valid_max": 127
171-
},
172-
"sst_count": {
173-
"type": "double",
174-
"long_name": "Number of SST measurements",
175-
"units": "count",
176-
"comment": "Unweighted count of number of contributory SST measurements. EXPERIMENTAL_FIELD",
177-
"valid_min": -127,
178-
"valid_max": 127
179-
},
180-
"sst_mean": {
181-
"type": "double",
182-
"long_name": "Unweighted SST mean",
183-
"units": "kelvin",
184-
"comment": "Unweighted mean of contributory SST measurements. EXPERIMENTAL_FIELD",
185-
"valid_min": -32767,
186-
"valid_max": 32767
31+
"scheduler_vm_types": "m7i-flex.xlarge",
32+
"worker_vm_types": "m7i-flex.xlarge",
33+
"allow_ingress_from": "me",
34+
"compute_purchase_option": "spot_with_fallback",
35+
"worker_options": {
36+
"nthreads": 1,
37+
"memory_limit": "100GB"
38+
}
18739
},
188-
"sst_standard_deviation": {
189-
"type": "double",
190-
"long_name": "Unweighted SST standard deviation",
191-
"units": "kelvin",
192-
"comment": "Standard deviation estimate of contributory SST measurements. EXPERIMENTAL_FIELD",
193-
"valid_min": -127,
194-
"valid_max": 127
195-
}
40+
"batch_size": 60
19641
},
42+
"metadata_uuid": "7d8d046b-6002-4897-b781-04e7324d6af8",
19743
"aws_opendata_registry": {
19844
"Name": "Satellite - Sea surface temperature - Level 3 - GeoPolar Multi sensor - 1 day - Night time",
19945
"Description": "This is a GeoPolar Multi-sensor SSTskin L3S product for a single night-time period, derived using sea surface temperature retrievals from the AHI sensor on the Himawari-8, the VIIRS sensor on the Suomi-NPP satellite and JPSS series of satellites, and AVHRR sensor on the NOAA and Metop series of Polar-orbiting satellites. The sensors and satellite platforms contributing to each file are listed in the sensor and platform global attributes in the file header. The GeoPolar Multi-sensor L3S product is provided as a 0.02deg x 0.02deg cylindrical equidistant projected map over the region 70E to 170W, 20N to 70S. The quality level for each pixel was remapped using the original ACSPO VIIRS and AVHRR L3U quality levels and Sensor Specific Error Statistics (SSES), as described in Govekar et al. (2022) https://doi.org/10.3390/rs14153785 and Griffin et al. (2017) Appendix at http://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-references/, before compositing single swaths from the sensors. Each grid cell contains the 1 night average of all the highest available quality SSTs that overlap with that cell, weighted by the area of overlap. Refer to the IMOS SST products web page at http://imos.org.au/sstproducts.html for further information.",
@@ -208,7 +54,7 @@
20854
"License": "http://creativecommons.org/licenses/by/4.0/",
20955
"Resources": [
21056
{
211-
"Description": "Cloud Optimised AODN dataset of IMOS - SRS - SST - L3S - GeoPolar Multi Sensor - 1 day - night time - Australia",
57+
"Description": "Cloud Optimised AODN dataset of IMOS - Satellite Remote Sensing - SST - L3S - GeoPolar Multi Sensor - 1 day - night time - Australia",
21258
"ARN": "arn:aws:s3:::aodn-cloud-optimised/satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia.zarr",
21359
"Region": "ap-southeast-2",
21460
"Type": "S3 Bucket"
@@ -217,7 +63,7 @@
21763
"DataAtWork": {
21864
"Tutorials": [
21965
{
220-
"Title": "Accessing IMOS - SRS - SST - L3S - GeoPolar Multi Sensor - 1 day - night time - Australia",
66+
"Title": "Accessing IMOS - Satellite Remote Sensing - SST - L3S - GeoPolar Multi Sensor - 1 day - night time - Australia",
22167
"URL": "https://github.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia.ipynb",
22268
"NotebookURL": "https://githubtocolab.com/aodn/aodn_cloud_optimised/blob/main/notebooks/satellite_ghrsst_l3s_1day_nighttime_geopolar_multi_sensor_australia.ipynb",
22369
"AuthorName": "Laurent Besnard",
@@ -233,45 +79,5 @@
23379
]
23480
},
23581
"Citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\""
236-
},
237-
"run_settings": {
238-
"coiled_cluster_options": {
239-
"n_workers": [
240-
30,
241-
150
242-
],
243-
"scheduler_vm_types": "m7i.xlarge",
244-
"worker_vm_types": "m7i.2xlarge",
245-
"allow_ingress_from": "me",
246-
"compute_purchase_option": "spot_with_fallback",
247-
"worker_options": {
248-
"nthreads": 16,
249-
"memory_limit": "64GB"
250-
}
251-
},
252-
"batch_size": 125,
253-
"cluster": {
254-
"mode": "coiled",
255-
"restart_every_path": true
256-
},
257-
"paths": [
258-
{
259-
"s3_uri": "s3://imos-data/IMOS/SRS/SST/ghrsst/L3SGM-1d/ngt",
260-
"filter": [],
261-
"year_range": [
262-
2018,
263-
2022
264-
]
265-
}
266-
],
267-
"clear_existing_data": true,
268-
"raise_error": false
269-
},
270-
"schema_transformation": {
271-
"global_attributes": {
272-
"set": {
273-
"title": ""
274-
}
275-
}
27682
}
27783
}

0 commit comments

Comments
 (0)