Skip to content

Commit 4ffd09e

Browse files
authored
Merge pull request #103 from GeoOcean/101-generalize-copernicus-downloader
101 generalize copernicus downloader
2 parents ebd61d5 + 948168f commit 4ffd09e

File tree

4 files changed

+298
-0
lines changed

4 files changed

+298
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"datasets": {
3+
"forecast_data": {
4+
"description": "ECMWF OpenData Forecast Data",
5+
"url": "https://confluence.ecmwf.int/display/DAC/ECMWF+open+data%3A+real-time+forecasts+from+IFS+and+AIFS",
6+
"models": [
7+
"ifs",
8+
"aifs"
9+
],
10+
"resolutions": [
11+
"0p25"
12+
]
13+
}
14+
}
15+
}

bluemath_tk/downloaders/ecmwf/__init__.py

Whitespace-only changes.
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
import json
2+
import os
3+
from typing import List, Union
4+
5+
import xarray as xr
6+
from ecmwf.opendata import Client
7+
8+
from .._base_downloaders import BaseDownloader
9+
10+
11+
class ECMWFDownloader(BaseDownloader):
12+
"""
13+
This is the main class to download data from the ECMWF.
14+
15+
Attributes
16+
----------
17+
product : str
18+
The product to download data from. Currently only OpenData is supported.
19+
product_config : dict
20+
The configuration for the product to download data from.
21+
client : ecmwf.opendata.Client
22+
The client to interact with the ECMWF API.
23+
24+
Examples
25+
--------
26+
.. jupyter-execute::
27+
28+
from bluemath_tk.downloaders.ecmwf.ecmwf_downloader import ECMWFDownloader
29+
30+
ecmwf_downloader = ECMWFDownloader(
31+
product="OpenData",
32+
base_path_to_download="/path/to/ECMWF/", # Will be created if not available
33+
check=True,
34+
)
35+
dataset = ecmwf_downloader.download_data(
36+
load_data=True,
37+
param=["msl"],
38+
step=[0, 240],
39+
type="fc",
40+
)
41+
print(dataset)
42+
"""
43+
44+
products_configs = {
45+
"OpenData": json.load(
46+
open(
47+
os.path.join(
48+
os.path.dirname(__file__), "OpenData", "OpenData_config.json"
49+
)
50+
)
51+
)
52+
}
53+
54+
def __init__(
55+
self,
56+
product: str,
57+
base_path_to_download: str,
58+
model: str = "ifs",
59+
resolution: str = "0p25",
60+
debug: bool = True,
61+
check: bool = True,
62+
) -> None:
63+
"""
64+
This is the constructor for the ECMWFDownloader class.
65+
66+
Parameters
67+
----------
68+
product : str
69+
The product to download data from. Currently only OpenData is supported.
70+
base_path_to_download : str
71+
The base path to download the data to.
72+
model : str, optional
73+
The model to download data from. Default is "ifs".
74+
resolution : str, optional
75+
The resolution to download data from. Default is "0p25".
76+
debug : bool, optional
77+
Whether to run in debug mode. Default is True.
78+
check : bool, optional
79+
Whether to just check the data. Default is True.
80+
81+
Raises
82+
------
83+
ValueError
84+
If the product configuration is not found.
85+
"""
86+
87+
super().__init__(
88+
base_path_to_download=base_path_to_download, debug=debug, check=check
89+
)
90+
self._product = product
91+
self._product_config = self.products_configs.get(product)
92+
if self._product_config is None:
93+
raise ValueError(f"{product} configuration not found")
94+
self.set_logger_name(
95+
f"ECMWFDownloader-{product}", level="DEBUG" if debug else "INFO"
96+
)
97+
if not self.check:
98+
if model not in self.product_config["datasets"]["forecast_data"]["models"]:
99+
raise ValueError(f"Model {model} not supported for {self.product}")
100+
if (
101+
resolution
102+
not in self.product_config["datasets"]["forecast_data"]["resolutions"]
103+
):
104+
raise ValueError(
105+
f"Resolution {resolution} not supported for {self.product}"
106+
)
107+
self._client = Client(
108+
source="ecmwf",
109+
model=model,
110+
resol=resolution,
111+
preserve_request_order=False,
112+
infer_stream_keyword=True,
113+
)
114+
self.logger.info("---- DOWNLOADING DATA ----")
115+
else:
116+
self.logger.info("---- CHECKING DATA ----")
117+
118+
# Set the model and resolution parameters
119+
self.model = model
120+
self.resolution = resolution
121+
122+
@property
123+
def product(self) -> str:
124+
return self._product
125+
126+
@property
127+
def product_config(self) -> dict:
128+
return self._product_config
129+
130+
@property
131+
def client(self) -> Client:
132+
return self._client
133+
134+
def list_datasets(self) -> List[str]:
135+
"""
136+
Lists the datasets available for the product.
137+
138+
Returns
139+
-------
140+
List[str]
141+
The list of datasets available for the product.
142+
"""
143+
144+
return list(self.product_config["datasets"].keys())
145+
146+
def download_data(
147+
self, load_data: bool = False, *args, **kwargs
148+
) -> Union[str, xr.Dataset]:
149+
"""
150+
Downloads the data for the product.
151+
152+
Parameters
153+
----------
154+
load_data : bool, optional
155+
Whether to load the data into an xarray.Dataset. Default is False.
156+
*args
157+
The arguments to pass to the download function.
158+
**kwargs
159+
The keyword arguments to pass to the download function.
160+
161+
Returns
162+
-------
163+
Union[str, xr.Dataset]
164+
The path to the downloaded file if load_data is False, otherwise the xarray.Dataset.
165+
166+
Raises
167+
------
168+
ValueError
169+
If the product is not supported.
170+
"""
171+
172+
if self.product == "OpenData":
173+
downloaded_file_path = self.download_data_open_data(*args, **kwargs)
174+
if load_data:
175+
return xr.open_dataset(downloaded_file_path, engine="cfgrib")
176+
else:
177+
return downloaded_file_path
178+
else:
179+
raise ValueError(f"Download for product {self.product} not supported")
180+
181+
def download_data_open_data(
182+
self,
183+
force: bool = False,
184+
**kwargs,
185+
) -> str:
186+
"""
187+
Downloads the data for the OpenData product.
188+
189+
Parameters
190+
----------
191+
force : bool, optional
192+
Whether to force the download. Default is False.
193+
**kwargs
194+
The keyword arguments to pass to the download function.
195+
196+
Returns
197+
-------
198+
str
199+
The path to the downloaded file.
200+
"""
201+
202+
if "param" in kwargs:
203+
variables = kwargs["param"]
204+
else:
205+
variables = []
206+
if "step" in kwargs:
207+
steps = kwargs["step"]
208+
if not isinstance(steps, list):
209+
steps = [steps]
210+
else:
211+
steps = []
212+
if "type" in kwargs:
213+
type = kwargs["type"]
214+
else:
215+
type = "fc"
216+
217+
output_grib_file = os.path.join(
218+
self.base_path_to_download,
219+
self.product,
220+
self.model,
221+
self.resolution,
222+
f"{'_'.join(variables)}_{'_'.join(str(step) for step in steps)}_{type}.grib2",
223+
)
224+
if not self.check:
225+
os.makedirs(os.path.dirname(output_grib_file), exist_ok=True)
226+
227+
if self.check or not force:
228+
if os.path.exists(output_grib_file):
229+
self.logger.debug(f"{output_grib_file} already downloaded")
230+
else:
231+
if self.check:
232+
self.logger.debug(f"{output_grib_file} not downloaded")
233+
else:
234+
self.logger.debug(f"Downloading: {output_grib_file}")
235+
self.client.retrieve(
236+
target=output_grib_file,
237+
**kwargs,
238+
)
239+
else:
240+
self.logger.debug(f"Downloading: {output_grib_file}")
241+
self.client.retrieve(
242+
target=output_grib_file,
243+
**kwargs,
244+
)
245+
246+
return output_grib_file
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import tempfile
2+
import unittest
3+
4+
import xarray as xr
5+
6+
from bluemath_tk.downloaders.ecmwf.ecmwf_downloader import ECMWFDownloader
7+
8+
9+
class TestECMWFDownloader(unittest.TestCase):
10+
def setUp(self):
11+
self.temp_dir = tempfile.mkdtemp()
12+
self.downloader = ECMWFDownloader(
13+
product="OpenData",
14+
base_path_to_download="OpenDataJavixu", # self.temp_dir,
15+
check=False, # Just check paths to download, do not actually download
16+
)
17+
18+
def test_list_datasets(self):
19+
datasets = self.downloader.list_datasets()
20+
self.assertIsInstance(datasets, list)
21+
self.assertTrue(len(datasets) > 0)
22+
print(f"Available datasets: {datasets}")
23+
24+
def test_download_data(self):
25+
dataset = self.downloader.download_data(
26+
load_data=True,
27+
param=["msl"],
28+
step=[0, 240],
29+
type="fc",
30+
force=False,
31+
)
32+
self.assertIsInstance(dataset, xr.Dataset)
33+
print(dataset)
34+
35+
36+
if __name__ == "__main__":
37+
unittest.main()

0 commit comments

Comments
 (0)