Skip to content

Commit 501a5d9

Browse files
committed
re-implement testing resource
1 parent 76d8c86 commit 501a5d9

File tree

2 files changed

+275
-0
lines changed

2 files changed

+275
-0
lines changed
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
{
2+
"dataset_name": "seabird_v1",
3+
"logger_name": "seabird_v1",
4+
"cloud_optimised_format": "parquet",
5+
"run_settings": {
6+
"paths": [
7+
{
8+
"s3_uri": "s3://imos-data/seabird",
9+
"filter": [
10+
"seabird_v1_2025-10-21T02:33:33.parquet"
11+
]
12+
}
13+
],
14+
"cluster": {
15+
"mode": "local",
16+
"restart_every_path": false
17+
},
18+
"clear_existing_data": true,
19+
"raise_error": false,
20+
"coiled_cluster_options": {
21+
"n_workers": [
22+
1,
23+
20
24+
],
25+
"scheduler_vm_types": "m7i-flex.large",
26+
"worker_vm_types": "m7i-flex.large",
27+
"allow_ingress_from": "me",
28+
"compute_purchase_option": "spot_with_fallback",
29+
"worker_options": {
30+
"nthreads": 4,
31+
"memory_limit": "8GB"
32+
}
33+
},
34+
"batch_size": 5,
35+
"force_previous_parquet_deletion": false
36+
},
37+
"metadata_uuid": null,
38+
"schema": {
39+
"index": {
40+
"type": "int64"
41+
},
42+
"id": {
43+
"type": "string"
44+
},
45+
"modified": {
46+
"type": "timestamp[ms]"
47+
},
48+
"bibliographicCitation": {
49+
"type": "string"
50+
},
51+
"institutionCode": {
52+
"type": "string"
53+
},
54+
"collectionCode": {
55+
"type": "string"
56+
},
57+
"basisOfRecord": {
58+
"type": "string"
59+
},
60+
"occurrenceID": {
61+
"type": "string"
62+
},
63+
"catalogNumber": {
64+
"type": "string"
65+
},
66+
"recordedBy": {
67+
"type": "string"
68+
},
69+
"individualCount": {
70+
"type": "int64"
71+
},
72+
"organismQuantity": {
73+
"type": "string"
74+
},
75+
"organismQuantityType": {
76+
"type": "string"
77+
},
78+
"sex": {
79+
"type": "string"
80+
},
81+
"lifeStage": {
82+
"type": "string"
83+
},
84+
"occurrenceStatus": {
85+
"type": "string"
86+
},
87+
"occurrenceRemarks": {
88+
"type": "string"
89+
},
90+
"organismID": {
91+
"type": "string"
92+
},
93+
"fieldNumber": {
94+
"type": "string"
95+
},
96+
"eventDate": {
97+
"type": "date32[day]"
98+
},
99+
"country": {
100+
"type": "string"
101+
},
102+
"stateProvince": {
103+
"type": "string"
104+
},
105+
"locality": {
106+
"type": "string"
107+
},
108+
"minimumDepthInMeters": {
109+
"type": "int64"
110+
},
111+
"maximumDepthInMeters": {
112+
"type": "int64"
113+
},
114+
"decimalLatitude": {
115+
"type": "double"
116+
},
117+
"decimalLongitude": {
118+
"type": "double"
119+
},
120+
"coordinateUncertaintyInMeters": {
121+
"type": "int64"
122+
},
123+
"coordinatePrecision": {
124+
"type": "double"
125+
},
126+
"footprintWKT": {
127+
"type": "string"
128+
},
129+
"identifiedBy": {
130+
"type": "string"
131+
},
132+
"scientificNameID": {
133+
"type": "string"
134+
},
135+
"scientificName": {
136+
"type": "string"
137+
},
138+
"valid_authority": {
139+
"type": "string"
140+
},
141+
"kingdom": {
142+
"type": "string"
143+
},
144+
"phylum": {
145+
"type": "string"
146+
},
147+
"class": {
148+
"type": "string"
149+
},
150+
"order": {
151+
"type": "string"
152+
},
153+
"family": {
154+
"type": "string"
155+
},
156+
"genus": {
157+
"type": "string"
158+
},
159+
"specificEpithet": {
160+
"type": "string"
161+
},
162+
"taxonRank": {
163+
"type": "string"
164+
},
165+
"scientificNameAuthorship": {
166+
"type": "string"
167+
},
168+
"vernacularName": {
169+
"type": "string"
170+
},
171+
"survey_type": {
172+
"type": "string"
173+
},
174+
"organisation": {
175+
"type": "string"
176+
},
177+
"dataset_id": {
178+
"type": "string"
179+
},
180+
"Wind Direction": {
181+
"type": "double"
182+
},
183+
"Air Temperature": {
184+
"type": "double"
185+
},
186+
"Wind Speed": {
187+
"type": "double"
188+
},
189+
"Depth": {
190+
"type": "double"
191+
},
192+
"Air Pressure": {
193+
"type": "double"
194+
},
195+
"Sea State": {
196+
"type": "string"
197+
},
198+
"Salinity": {
199+
"type": "double"
200+
},
201+
"Cloud Cover": {
202+
"type": "string"
203+
}
204+
},
205+
"aws_opendata_registry": {
206+
"Name": "",
207+
"Description": "",
208+
"Documentation": "",
209+
"Contact": "",
210+
"ManagedBy": "",
211+
"UpdateFrequency": "As Needed",
212+
"Tags": [],
213+
"License": "http://creativecommons.org/licenses/by/4.0/",
214+
"Resources": [
215+
{
216+
"Description": "",
217+
"ARN": "arn:aws:s3:::aodn-cloud-optimised/seabird_v1.parquet",
218+
"Region": "ap-southeast-2",
219+
"Type": "S3 Bucket"
220+
}
221+
],
222+
"DataAtWork": {
223+
"Tutorials": []
224+
},
225+
"Citation": ""
226+
},
227+
"schema_transformation": {
228+
"drop_variables": [],
229+
"add_variables": {
230+
"filename": {
231+
"source": "@filename",
232+
"schema": {
233+
"type": "string",
234+
"units": "1",
235+
"long_name": "Filename of the source file"
236+
}
237+
},
238+
"timestamp": {
239+
"source": "@partitioning:time_extent",
240+
"schema": {
241+
"type": "int64",
242+
"units": "1",
243+
"long_name": "Partition timestamp"
244+
}
245+
},
246+
"polygon": {
247+
"source": "@partitioning:spatial_extent",
248+
"schema": {
249+
"type": "string",
250+
"units": "1",
251+
"long_name": "Spatial partition polygon"
252+
}
253+
}
254+
},
255+
"partitioning": [
256+
{
257+
"source_variable": "timestamp",
258+
"type": "time_extent",
259+
"time_extent": {
260+
"time_varname": "eventDate",
261+
"partition_period": "Y"
262+
}
263+
},
264+
{
265+
"source_variable": "polygon",
266+
"type": "spatial_extent",
267+
"spatial_extent": {
268+
"lat_varname": "decimalLatitude",
269+
"lon_varname": "decimalLongitude",
270+
"spatial_resolution": 30
271+
}
272+
}
273+
]
274+
}
275+
}
Binary file not shown.

0 commit comments

Comments
 (0)