Skip to content

Commit 63ab3ae

Browse files
authored
add new tool: Copernicus CMEMS Downloader for AquaINFRA marine model (#198)
* add new tool: Copernicus CMEMS Downloader for AquaINFRA marine model * some linting * some linting * add citations * replace credentials tags by older authentication method * add versions to packages * change profile version to 22.05 * add test * add test * add test * add test * add test * add test * add test * add expect_failure=true * remove some citations
1 parent 69bc057 commit 63ab3ae

4 files changed

Lines changed: 614 additions & 0 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
categories:
2+
- Ecology
3+
owner: ecology
4+
remote_repository_url: https://github.com/AquaINFRA/galaxy_vre
5+
homepage_url: https://github.com/AquaINFRA/galaxy_vre
6+
long_description: |
7+
Copernicus CMEMS Downloader for AquaINFRA marine model
8+
type: unrestricted
9+
auto_tool_repositories:
10+
name_template: "{{ tool_id }}"
11+
description_template: "Copernicus CMEMS Downloader for AquaINFRA marine model: {{ tool_name }}."
Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
#!/usr/bin/env bash
2+
3+
# !/bin/bash -i
4+
set -euo pipefail
5+
6+
MIN_LAT="${1}"
7+
MAX_LAT="${2}"
8+
MIN_LON="${3}"
9+
MAX_LON="${4}"
10+
MIN_DEPTH="${5}"
11+
MAX_DEPTH="${6}"
12+
START_DATE="${7}"
13+
END_DATE="${8}"
14+
BC_SOUTH="${9}"
15+
BC_NORTH="${10}"
16+
BC_WEST="${11}"
17+
BC_EAST="${12}"
18+
19+
cm_xmin=`echo "${MIN_LON} - 0.1" | bc`
20+
# store_var cm_xmin
21+
cm_ymin=`echo "${MIN_LAT} - 0.1" | bc`
22+
# store_var cm_ymin
23+
cm_xmax=`echo "${MAX_LON} + 0.1" | bc`
24+
# store_var cm_ymin
25+
cm_ymax=`echo "${MAX_LAT} + 0.1" | bc`
26+
# store_var cm_ymax
27+
28+
# Detect this script's directory (so we can find the Python helper file)
29+
TOOL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
30+
echo ">> startdate: ${START_DATE}"
31+
echo ">> Tool directory set to: ${TOOL_DIR}"
32+
33+
# Define dataset IDs
34+
REANALYSIS_TEM_DATASET_ID="med-cmcc-tem-rean-d"
35+
FCANALYSIS_TEM_DATASET_ID="cmems_mod_med_phy-tem_anfc_4.2km_P1D-m"
36+
37+
# Define functions
38+
39+
check_product_availability(){
40+
local product_id="$1"
41+
local start_date="$2"
42+
local end_date="$3"
43+
44+
echo ">> Checking availability for $product_id from $start_date to $end_date"
45+
46+
# Get metadata in JSON
47+
local metadata_json
48+
copernicusmarine login --username "$CMEMS_USERNAME" --password "$CMEMS_PASSWORD" --force-overwrite
49+
metadata_json=$(copernicusmarine describe --dataset-id "$product_id" --return-fields datasets 2>/dev/null)
50+
51+
if [ -z "$metadata_json" ]; then
52+
echo ">> Warning: Could not fetch metadata for $product_id"
53+
return 1
54+
fi
55+
56+
echo ">> DEBUG: Metadata JSON for $product_id:"
57+
echo ">> $metadata_json"
58+
59+
# Extract min/max time (milliseconds since epoch)
60+
local start_epoch_ms end_epoch_ms
61+
start_epoch_ms=$(echo "$metadata_json" | jq -r '.products[0].datasets[0].versions[0].parts[].services[] | select(.service_name == "arco-geo-series").variables[] | select(.short_name=="thetao").coordinates[] | select(.coordinate_id=="time") | .minimum_value')
62+
end_epoch_ms=$(echo "$metadata_json" | jq -r '.products[0].datasets[0].versions[0].parts[].services[] | select(.service_name == "arco-geo-series").variables[] | select(.short_name=="thetao").coordinates[] | select(.coordinate_id=="time") | .maximum_value')
63+
64+
if [[ -z "$start_epoch_ms" || -z "$end_epoch_ms" ]]; then
65+
echo ">> Warning: Could not extract the time range in metadata for $product_id"
66+
return 1
67+
fi
68+
69+
# Detect if we are on macOS
70+
local is_mac=false
71+
if [[ "$(uname)" == "Darwin" ]]; then
72+
is_mac=true
73+
fi
74+
75+
# Convert milliseconds to seconds (integer division)
76+
local product_start_epoch product_end_epoch
77+
product_start_epoch=$(( ${start_epoch_ms%.*} / 1000 ))
78+
product_end_epoch=$(( ${end_epoch_ms%.*} / 1000 ))
79+
80+
# Convert input dates to epoch (seconds since Unix epoch macOS/Linux compatible)
81+
start_date=$(echo "$start_date" | xargs)
82+
end_date=$(echo "$end_date" | xargs)
83+
84+
local start_date_epoch end_date_epoch
85+
if $is_mac; then
86+
start_date_epoch=$(date -j -f "%Y-%m-%d" "$start_date" +"%s")
87+
end_date_epoch=$(date -j -f "%Y-%m-%d" "$end_date" +"%s")
88+
else
89+
start_date_epoch=$(date -d "$start_date" +"%s")
90+
end_date_epoch=$(date -d "$end_date" +"%s")
91+
fi
92+
93+
# Include the full last day
94+
end_date_epoch=$((end_date_epoch + 86399))
95+
96+
if [[ -z "$start_date_epoch" || -z "$end_date_epoch" ]]; then
97+
echo ">> ERROR: Failed to parse input dates: '$start_date', '$end_date'"
98+
return 1
99+
fi
100+
101+
# Convert product epoch to human-readable dates for logging
102+
local product_start_date product_end_date
103+
104+
if $is_mac; then
105+
product_start_date=$(date -u -r "$product_start_epoch" +"%Y-%m-%d")
106+
product_end_date=$(date -u -r "$product_end_epoch" +"%Y-%m-%d")
107+
else
108+
product_start_date=$(date -u -d @"$product_start_epoch" +"%Y-%m-%d")
109+
product_end_date=$(date -u -d @"$product_end_epoch" +"%Y-%m-%d")
110+
fi
111+
112+
echo ">> start_date_epoch=$start_date_epoch, end_date_epoch=$end_date_epoch"
113+
echo ">> product_start_epoch=$product_start_epoch, product_end_epoch=$product_end_epoch"
114+
115+
echo ">> Checking if date range $start_date$end_date is within product range $product_start_date$product_end_date for $product_id"
116+
117+
# Check if requested range is fully within product range
118+
if [[ $start_date_epoch -ge $product_start_epoch && $end_date_epoch -le $product_end_epoch ]]; then
119+
return 0 # Entire range is available
120+
else
121+
echo ">> Date range $start_date to $end_date is not fully covered by product $product_id"
122+
return 1 # Not available
123+
fi
124+
125+
}
126+
127+
select_datasets() {
128+
if check_product_availability "$REANALYSIS_TEM_DATASET_ID" "$START_DATE" "$END_DATE"; then
129+
cm_tem_daily_product="$REANALYSIS_TEM_DATASET_ID"
130+
cm_sal_daily_product="med-cmcc-sal-rean-d"
131+
cm_cur_daily_product="med-cmcc-cur-rean-d"
132+
prod_version="202012"
133+
echo ">> Using Copernicus Mediterranean Sea Physics Reanalysis product for ${START_DATE} to ${END_DATE}"
134+
else
135+
cm_tem_daily_product="$FCANALYSIS_TEM_DATASET_ID"
136+
cm_sal_daily_product="cmems_mod_med_phy-sal_anfc_4.2km_P1D-m"
137+
cm_cur_daily_product="cmems_mod_med_phy-cur_anfc_4.2km_P1D-m"
138+
prod_version="202411"
139+
echo ">> Using Copernicus Mediterranean Sea Physics Analysis and Forecast product for ${START_DATE} to ${END_DATE}"
140+
fi
141+
}
142+
143+
download_ic(){
144+
145+
# log_print ">>"
146+
# log_print ">> Downloading ocean fields for initial conditions"
147+
# log_print ">> Bounding box petition: (xmin,xmax)=($cm_xmin,$cm_xmax), (ymin,ymax)=($cm_ymin,$cm_ymax)"
148+
149+
for VAR in thetao so cur; do
150+
echo ">> Downloading variable $VAR..."
151+
152+
case "$VAR" in
153+
thetao | so)
154+
case "$VAR" in
155+
thetao) FILENAME="IC_cm_t.nc"; DATASET="$cm_tem_daily_product" ;;
156+
so) FILENAME="IC_cm_s.nc"; DATASET="$cm_sal_daily_product" ;;
157+
esac
158+
copernicusmarine subset \
159+
--dataset-id "$DATASET" \
160+
--dataset-version "${prod_version}" \
161+
--minimum-longitude "$cm_xmin" \
162+
--maximum-longitude "$cm_xmax" \
163+
--minimum-latitude "$cm_ymin" \
164+
--maximum-latitude "$cm_ymax" \
165+
--start-datetime "${START_DATE}T00:00:00" \
166+
--end-datetime "${END_DATE}T23:59:59" \
167+
--minimum-depth "$MIN_DEPTH" \
168+
--maximum-depth "$MAX_DEPTH" \
169+
--variable "$VAR" \
170+
--output-filename "$FILENAME" \
171+
--overwrite ;;
172+
173+
cur) FILENAME="IC_cm_cur.nc"; DATASET="$cm_cur_daily_product"
174+
175+
copernicusmarine subset \
176+
--dataset-id "$DATASET" \
177+
--dataset-version "${prod_version}" \
178+
--minimum-longitude "$cm_xmin" \
179+
--maximum-longitude "$cm_xmax" \
180+
--minimum-latitude "$cm_ymin" \
181+
--maximum-latitude "$cm_ymax" \
182+
--start-datetime "${START_DATE}T00:00:00" \
183+
--end-datetime "${END_DATE}T23:59:59" \
184+
--minimum-depth "$MIN_DEPTH" \
185+
--maximum-depth "$MAX_DEPTH" \
186+
-v uo -v vo \
187+
--output-filename "$FILENAME" \
188+
--overwrite ;;
189+
esac
190+
191+
done
192+
193+
}
194+
195+
download_obc(){
196+
echo ">>"
197+
echo ">> Dowloading data for ${OBC} open boundary..."
198+
echo ">> Bounding box petition: (xmin,xmax)=($cm_bc_xmin,$cm_bc_xmax), (ymin,ymax)=($cm_bc_ymin,$cm_bc_ymax)"
199+
200+
for VAR in thetao so cur; do
201+
echo ">> Downloading variable $VAR for ${OBC} open boundary..."
202+
203+
case "$VAR" in
204+
thetao | so)
205+
case "$VAR" in
206+
thetao) FILENAME="OB${OBC}_cm_t.nc"; DATASET="$cm_tem_daily_product" ;;
207+
so) FILENAME="OB${OBC}_cm_s.nc"; DATASET="$cm_sal_daily_product" ;;
208+
esac
209+
copernicusmarine subset \
210+
--dataset-id "$DATASET" \
211+
--dataset-version "${prod_version}" \
212+
--minimum-longitude "$cm_bc_xmin" \
213+
--maximum-longitude "$cm_bc_xmax" \
214+
--minimum-latitude "$cm_bc_ymin" \
215+
--maximum-latitude "$cm_bc_ymax" \
216+
--start-datetime "${START_DATE}T00:00:00" \
217+
--end-datetime "${END_DATE}T23:59:59" \
218+
--minimum-depth "$MIN_DEPTH" \
219+
--maximum-depth "$MAX_DEPTH" \
220+
--variable "$VAR" \
221+
--coordinates-selection-method "nearest" \
222+
--output-filename "$FILENAME" \
223+
--overwrite ;;
224+
225+
cur) FILENAME="OB${OBC}_cm_cur.nc"; DATASET="$cm_cur_daily_product"
226+
227+
copernicusmarine subset \
228+
--dataset-id "$DATASET" \
229+
--dataset-version "${prod_version}" \
230+
--minimum-longitude "$cm_bc_xmin" \
231+
--maximum-longitude "$cm_bc_xmax" \
232+
--minimum-latitude "$cm_bc_ymin" \
233+
--maximum-latitude "$cm_bc_ymax" \
234+
--start-datetime "${START_DATE}T00:00:00" \
235+
--end-datetime "${END_DATE}T23:59:59" \
236+
--minimum-depth "$MIN_DEPTH" \
237+
--maximum-depth "$MAX_DEPTH" \
238+
-v uo -v vo \
239+
--coordinates-selection-method "nearest" \
240+
--output-filename "$FILENAME" \
241+
--overwrite ;;
242+
243+
esac
244+
245+
done
246+
247+
echo "download obc finished"
248+
}
249+
250+
#-------------------------------------------------------------------------------------
251+
# Main execution
252+
#--------------------------------------------------------------------------------------
253+
# Determine which dataset is available for the date -----------------------------------
254+
255+
select_datasets
256+
257+
max_attempts=50
258+
attempt=1
259+
260+
# Download the data for IC -------------------------------------------------------------
261+
262+
while (( attempt <= max_attempts )); do
263+
download_ic
264+
wait
265+
if [[ -f "IC_cm_cur.nc" && -f "IC_cm_s.nc" && -f "IC_cm_t.nc" ]]; then
266+
echo ">> All IC files are present after $attempt attempts"
267+
break
268+
else
269+
if (( attempt == max_attempts )); then
270+
echo ">> Files not found after $max_attempts attempts"
271+
exit 1
272+
else
273+
echo ">> Attempt $((attempt))/$max_attempts: Some files missing"
274+
((attempt++))
275+
fi
276+
fi
277+
done
278+
279+
# Find model boundaries in Copernicus MedSea Products ---------------------------------
280+
281+
echo ">> DEBUG: calling -> python3 ${TOOL_DIR}/medsea_bc_limits.py --xmin='${MIN_LON}' --xmax='${MAX_LON}' --ymin='${MIN_LAT}' --ymax='${MAX_LAT}' --infile='IC_cm_t.nc'" >> debug.log
282+
283+
python3 "${TOOL_DIR}/medsea_bc_limits.py" --xmin="${MIN_LON}" --xmax="${MAX_LON}" --ymin="${MIN_LAT}" --ymax="${MAX_LAT}" --infile='IC_cm_t.nc'
284+
285+
echo ">> copernicus_lims.inc file created"
286+
287+
288+
# Download the data for each boundary--------------------------------------------------
289+
290+
source copernicus_lims.inc
291+
292+
declare -a OBC_LABEL=("S" "N" "W" "E")
293+
declare -a OBC_VALUE=(${BC_SOUTH} ${BC_NORTH} ${BC_WEST} ${BC_EAST})
294+
295+
NOBC=${#OBC_LABEL[@]}
296+
297+
for (( i=0; i<${NOBC}; i++ ));
298+
do
299+
OBC=${OBC_LABEL[$i]}
300+
OBC_SELEC=${OBC_VALUE[$i]}
301+
302+
echo "${OBC}"
303+
echo "${OBC_SELEC}"
304+
305+
if [ "${OBC_SELEC}" == "true" ]; then
306+
if [ "${OBC}" == "S" ]; then
307+
cm_bc_xmin=${COPERNICUS_WEST_X1}
308+
cm_bc_xmax=${COPERNICUS_EAST_X2}
309+
cm_bc_ymin=${COPERNICUS_SOUTH_Y1}
310+
cm_bc_ymax=${COPERNICUS_SOUTH_Y2}
311+
312+
elif [ "${OBC}" == "N" ]; then
313+
cm_bc_xmin=${COPERNICUS_WEST_X1}
314+
cm_bc_xmax=${COPERNICUS_EAST_X2}
315+
cm_bc_ymin=${COPERNICUS_NORTH_Y1}
316+
cm_bc_ymax=${COPERNICUS_NORTH_Y2}
317+
318+
elif [ "${OBC}" == "W" ]; then
319+
cm_bc_xmin=${COPERNICUS_WEST_X1}
320+
cm_bc_xmax=${COPERNICUS_WEST_X2}
321+
cm_bc_ymin=${COPERNICUS_SOUTH_Y1}
322+
cm_bc_ymax=${COPERNICUS_NORTH_Y2}
323+
324+
elif [ "${OBC}" == "E" ]; then
325+
cm_bc_xmin=${COPERNICUS_EAST_X1}
326+
cm_bc_xmax=${COPERNICUS_EAST_X2}
327+
cm_bc_ymin=${COPERNICUS_SOUTH_Y1}
328+
cm_bc_ymax=${COPERNICUS_NORTH_Y2}
329+
330+
fi
331+
332+
max_attempts=50
333+
attempt=1
334+
echo ">> Starting obc download for ${OBC} boundary"
335+
336+
while (( attempt <= max_attempts )); do
337+
download_obc
338+
echo ">> download obc files finished"
339+
wait
340+
if [[ -f "OB${OBC}_cm_cur.nc" && -f "OB${OBC}_cm_s.nc" && -f "OB${OBC}_cm_t.nc" ]]; then
341+
echo ">> All files for $OBC boundary are present after $attempt attempts"
342+
break
343+
else
344+
if (( attempt == max_attempts )); then
345+
echo ">> Files not found after $max_attempts attempts"
346+
exit 1
347+
else
348+
echo ">> Attempt $((attempt))/$max_attempts: Some files missing"
349+
((attempt++))
350+
fi
351+
fi
352+
done
353+
fi
354+
done
355+
356+
# ---- Create ZIP of NetCDF outputs ----
357+
ZIP_NAME="cmems_ic_bc_data.zip"
358+
echo ">> Creating zip archive: ${ZIP_NAME}"
359+
360+
# Check for .nc files before zipping
361+
NC_FILES=(*.nc)
362+
if [ ${#NC_FILES[@]} -gt 0 ]; then
363+
echo ">> Found ${#NC_FILES[@]} NetCDF file(s) to zip:"
364+
ls -lh "${NC_FILES[@]}"
365+
366+
# Zip all .nc files (quietly, replacing any existing zip)
367+
zip -j -r "$ZIP_NAME" "${NC_FILES[@]}" >/dev/null 2>&1
368+
369+
if [ -f "$ZIP_NAME" ]; then
370+
echo ">> Zip file created successfully:"
371+
ls -lh "$ZIP_NAME"
372+
else
373+
echo ">> Zip creation failed for unknown reason."
374+
exit 1
375+
fi
376+
else
377+
echo ">> No .nc files found to zip! Skipping zip creation."
378+
# Create an empty placeholder so Galaxy still has an output
379+
touch "$ZIP_NAME"
380+
fi
381+
382+
echo ">> Final working directory check:"
383+
pwd
384+
ls -lh

0 commit comments

Comments
 (0)