Skip to content

Commit 694e709

Browse files
authored
Merge pull request #110 from aodn/FeatHandleInconsistentGrid
FeatHandleInconsistentGrid
2 parents 7722586 + f9f77ea commit 694e709

11 files changed

+604
-127
lines changed

aodn_cloud_optimised/lib/GenericZarrHandler.py

Lines changed: 255 additions & 92 deletions
Large diffs are not rendered by default.

aodn_cloud_optimised/lib/s3Tools.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
import boto3
1+
import logging
22
from urllib.parse import urlparse
3+
4+
import boto3
35
import s3fs
4-
import logging
6+
from botocore import UNSIGNED
7+
from botocore.config import Config
58

69

710
def s3_ls(bucket, prefix, suffix=".nc", s3_path=True) -> list:
@@ -33,7 +36,9 @@ def s3_ls(bucket, prefix, suffix=".nc", s3_path=True) -> list:
3336

3437
logger.info(f"Listing S3 objects in {bucket} under {prefix} ending with {suffix}")
3538

36-
s3 = boto3.client("s3")
39+
# DONE: allow S3 connection publicly. Is this a regression doing so?
40+
# s3 = boto3.client("s3")
41+
s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))
3742

3843
paginator = s3.get_paginator("list_objects_v2")
3944
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"dataset_name": "radar_NorthWestShelf_velocity_hourly_averaged_delayed_qc",
3+
"logger_name": "radar_NorthWestShelf_velocity_hourly_averaged_delayed_qc",
4+
"parent_config": "radar_velocity_hourly_averaged_delayed_qc_no_I_J_version_main.json",
5+
"metadata_uuid": "23c27e4f-c982-44e9-9ab7-71094d297549",
6+
"dimensions": {
7+
"time": {
8+
"name": "TIME",
9+
"chunk": 100,
10+
"rechunk": false
11+
},
12+
"latitude": {
13+
"name": "LATITUDE",
14+
"chunk": 51
15+
},
16+
"longitude": {
17+
"name": "LONGITUDE",
18+
"chunk": 55
19+
}
20+
},
21+
"dataset_gattrs": {
22+
"title": ""
23+
},
24+
"aws_opendata_registry": {
25+
"Name": "Ocean Radar - Northwest Shelf site (Western Australia, Australia) - Sea water velocity - Delayed mode",
26+
"Description": "The Northwest Shelf (NWA) HF ocean radar system covers an area which includes the Ningaloo Peninsula and the Ningaloo Reef to the west. The Ningaloo Reef is one of the longest and most pristine reefs in the world. The reef is rich in marine biodiversity, with shark whales, turtles and fish aggregations, and high primary and secondary productions which are controlled by the physical oceanographic processes. \n\nThe NWA HF ocean radar is a WERA phased array system with 12-element receive arrays located at the Jurabi Turtle Centre (21.8068 S, 114.1015 E) and Point Billie (22.5432 S, 113.690 E). These radars operate at a frequency of 5.2625 MHz, with a bandwidth of 25 KHz and a maximum range of 200 Km. Within the HF radar coverage area surface currents are measured. Data are also collected from which wind directions and significant wave height can be calculated.",
27+
"Documentation": "https://catalogue-imos.aodn.org.au/geonetwork/srv/eng/catalog.search#/metadata/23c27e4f-c982-44e9-9ab7-71094d297549",
28+
"Contact": "info@aodn.org.au",
29+
"ManagedBy": "AODN",
30+
"UpdateFrequency": "As Needed",
31+
"Tags": [
32+
"oceans",
33+
"ocean currents",
34+
"ocean velocity"
35+
],
36+
"License": "http://creativecommons.org/licenses/by/4.0/",
37+
"Resources": [
38+
{
39+
"Description": "Cloud Optimised AODN dataset of IMOS - ACORN - Northwest Shelf HF ocean radar site (Western Australia, Australia) - Delayed mode sea water velocity",
40+
"ARN": "arn:aws:s3:::aodn-cloud-optimised/radar_NorthWestShelf_velocity_hourly_averaged_delayed_qc.zarr",
41+
"Region": "ap-southeast-2",
42+
"Type": "S3 Bucket"
43+
}
44+
],
45+
"DataAtWork": {
46+
"Tutorials": [
47+
{
48+
"Title": "Accessing IMOS - ACORN - Northwest Shelf HF ocean radar site (Western Australia, Australia) - Delayed mode sea water velocity",
49+
"URL": "https://nbviewer.org/github/aodn/aodn_cloud_optimised/blob/main/notebooks/radar_NorthWestShelf_velocity_hourly_averaged_delayed_qc.ipynb",
50+
"NotebookURL": "https://githubtocolab.com/aodn/aodn_cloud_optimised/blob/main/notebooks/radar_NorthWestShelf_velocity_hourly_averaged_delayed_qc.ipynb",
51+
"AuthorName": "Laurent Besnard",
52+
"AuthorURL": "https://github.com/aodn/aodn_cloud_optimised"
53+
},
54+
{
55+
"Title": "Accessing and search for any AODN dataset",
56+
"URL": "https://nbviewer.org/github/aodn/aodn_cloud_optimised/blob/main/notebooks/GetAodnData.ipynb",
57+
"NotebookURL": "https://githubtocolab.com/aodn/aodn_cloud_optimised/blob/main/notebooks/GetAodnData.ipynb",
58+
"AuthorName": "Laurent Besnard",
59+
"AuthorURL": "https://github.com/aodn/aodn_cloud_optimised"
60+
}
61+
]
62+
},
63+
"Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]"
64+
}
65+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
{
2+
"dataset_name": "radar_velocity_hourly_averaged_delayed_qc_no_I_J_version_main",
3+
"logger_name": "radar_velocity_hourly_averaged_delayed_qc_no_I_J_version_main",
4+
"cloud_optimised_format": "zarr",
5+
"coiled_cluster_options": {
6+
"n_workers": [
7+
12,
8+
140
9+
],
10+
"scheduler_vm_types": "m7i.large",
11+
"worker_vm_types": "m7i.xlarge",
12+
"allow_ingress_from": "me",
13+
"compute_purchase_option": "spot_with_fallback",
14+
"worker_options": {
15+
"nthreads": 16,
16+
"memory_limit": "64GB"
17+
}
18+
},
19+
"batch_size": 6000,
20+
"var_template_shape": "UCUR",
21+
"vars_to_drop_no_common_dimension": [
22+
"LATITUDE",
23+
"LONGITUDE",
24+
"GDOP"
25+
],
26+
"schema": {
27+
"TIME": {
28+
"type": "timestamp[ns]",
29+
"standard_name": "time",
30+
"long_name": "time",
31+
"axis": "T",
32+
"valid_min": 0.0,
33+
"valid_max": 999999.0,
34+
"comment": "Given time lays at the middle of the averaging time period.",
35+
"local_time_zone": 8.0
36+
},
37+
"LATITUDE": {
38+
"type": "double",
39+
"standard_name": "latitude",
40+
"long_name": "latitude",
41+
"units": "degrees_north",
42+
"axis": "Y",
43+
"valid_min": -90.0,
44+
"valid_max": 90.0,
45+
"reference_datum": "geographical coordinates, WGS84 datum"
46+
},
47+
"LONGITUDE": {
48+
"type": "double",
49+
"standard_name": "longitude",
50+
"long_name": "longitude",
51+
"units": "degrees_east",
52+
"axis": "X",
53+
"valid_min": -180.0,
54+
"valid_max": 180.0,
55+
"reference_datum": "geographical coordinates, WGS84 datum"
56+
},
57+
"GDOP": {
58+
"type": "float",
59+
"long_name": "radar beam intersection angle",
60+
"units": "Degrees",
61+
"valid_min": 0.0,
62+
"valid_max": 180.0,
63+
"comment": "This angle is used to assess the impact of Geometric Dilution of Precision. If angle >= 150 or <= 30, then QC flag will not be lower than 4 (see abstract)."
64+
},
65+
"UCUR": {
66+
"type": "float",
67+
"standard_name": "eastward_sea_water_velocity",
68+
"long_name": "Mean of sea water velocity U component values in 1 hour, after rejection of obvious bad data (see abstract).",
69+
"units": "m s-1",
70+
"valid_min": -10.0,
71+
"valid_max": 10.0,
72+
"cell_methods": "TIME: mean",
73+
"ancillary_variables": "UCUR_quality_control"
74+
},
75+
"VCUR": {
76+
"type": "float",
77+
"standard_name": "northward_sea_water_velocity",
78+
"long_name": "Mean of sea water velocity V component values in 1 hour, after rejection of obvious bad data (see abstract).",
79+
"units": "m s-1",
80+
"valid_min": -10.0,
81+
"valid_max": 10.0,
82+
"cell_methods": "TIME: mean",
83+
"ancillary_variables": "VCUR_quality_control"
84+
},
85+
"UCUR_sd": {
86+
"type": "float",
87+
"long_name": "Standard deviation of sea water velocity U component values in 1 hour, after rejection of obvious bad data (see abstract).",
88+
"units": "m s-1",
89+
"valid_min": -10.0,
90+
"valid_max": 10.0,
91+
"cell_methods": "TIME: standard_deviation",
92+
"ancillary_variables": "UCUR_quality_control"
93+
},
94+
"VCUR_sd": {
95+
"type": "float",
96+
"long_name": "Standard deviation of sea water velocity V component values in 1 hour, after rejection of obvious bad data (see abstract).",
97+
"units": "m s-1",
98+
"valid_min": -10.0,
99+
"valid_max": 10.0,
100+
"cell_methods": "TIME: standard_deviation",
101+
"ancillary_variables": "VCUR_quality_control"
102+
},
103+
"NOBS1": {
104+
"type": "float",
105+
"long_name": "Number of observations of sea water velocity in 1 hour from station 1, after rejection of obvious bad data (see abstract).",
106+
"units": "1"
107+
},
108+
"NOBS2": {
109+
"type": "float",
110+
"long_name": "Number of observations of sea water velocity in 1 hour from station 2, after rejection of obvious bad data (see abstract).",
111+
"units": "1"
112+
},
113+
"UCUR_quality_control": {
114+
"type": "float",
115+
"standard_name": "eastward_sea_water_velocity status_flag",
116+
"long_name": "quality flag for eastward_sea_water_velocity",
117+
"quality_control_conventions": "IMOS standard flags",
118+
"quality_control_set": 1.0,
119+
"valid_min": 0,
120+
"valid_max": 9,
121+
"flag_values": [
122+
0,
123+
1,
124+
2,
125+
3,
126+
4,
127+
5,
128+
6,
129+
7,
130+
8,
131+
9
132+
],
133+
"flag_meanings": "no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values",
134+
"comment": "This value is set on the basis of the offline quality controls applied in the time domain (see abstract)."
135+
},
136+
"VCUR_quality_control": {
137+
"type": "float",
138+
"standard_name": "northward_sea_water_velocity status_flag",
139+
"long_name": "quality flag for northward_sea_water_velocity",
140+
"quality_control_conventions": "IMOS standard flags",
141+
"quality_control_set": 1.0,
142+
"valid_min": 0,
143+
"valid_max": 9,
144+
"flag_values": [
145+
0,
146+
1,
147+
2,
148+
3,
149+
4,
150+
5,
151+
6,
152+
7,
153+
8,
154+
9
155+
],
156+
"flag_meanings": "no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values",
157+
"comment": "This value is set on the basis of the offline quality controls applied in the time domain (see abstract)."
158+
}
159+
},
160+
"dataset_gattrs": {
161+
"title": ""
162+
}
163+
}

test_aodn_cloud_optimised/test_bin_generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_main(self, mock_parse_args):
149149
log_handler.flush()
150150
captured_logs = log_stream.getvalue().strip().split("\n")
151151

152-
# Validate logs (add more specific assertions based on your logging format)
152+
# Validate logs
153153
self.assertTrue(
154154
any("Cluster dask dashboard" in log for log in captured_logs)
155155
)

test_aodn_cloud_optimised/test_generic_parquet_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ def test_parquet_nc_generic_handler_bad_time_values(self):
421421
log_handler.flush()
422422
captured_logs = log_stream.getvalue().strip().split("\n")
423423

424-
# Validate logs (add more specific assertions based on your logging format)
424+
# Validate logs
425425
self.assertTrue(
426426
any(
427427
"All values of the time variable were bad" in log
@@ -441,7 +441,7 @@ def test_parquet_nc_generic_handler_bad_time_values(self):
441441

442442
log_handler.flush()
443443
captured_logs = log_stream.getvalue().strip().split("\n")
444-
# Validate logs (add more specific assertions based on your logging format)
444+
# Validate logs
445445
self.assertTrue(
446446
any(
447447
"time issues with the input file. File not processed" in log

0 commit comments

Comments
 (0)