Skip to content

Commit 4cceb24

Browse files
authored
Add CHELSA-W5E5 atmospheric data source. (#87)
* Add CW5E5DailyDownloader class. * Replace slashes with os.path.join. * Add generic aggregator class. * Add cw5e5 climate aggregator (wip). * Implement cw5e5 in _open_climatology (wip). * Add missing December in _open_climatology. * Add temperature, precipitation atmosphere kwargs. * Add missing cw5e5 projection info. * Create clim directory if missing. * Disable test region and go global. * Make minor cleanup. * Document changes in whatsnew.
1 parent 10bf71d commit 4cceb24

File tree

4 files changed

+194
-9
lines changed

4 files changed

+194
-9
lines changed

doc/whatsnew.rst

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,24 @@ What's new
2626
v0.3.2 (unreleased)
2727
-------------------
2828

29+
New features
30+
~~~~~~~~~~~~
31+
32+
- Add ``temperature`` and ``precipitation`` arguments, and CHELSA-W5E5_
33+
(aliased ``'cw5e5'``) as a new temperature and precipitation data source in
34+
:func:`hyoga.open.atmosphere` (:issue:`86`, :pull:`87`).
35+
36+
.. _CHELSA-W5E5: https://chelsa-climate.org/chelsa-w5e5-v1-0-daily-climate-data-at-1km-resolution/
37+
2938
Bug fixes
3039
~~~~~~~~~
3140

32-
- Add newly missing requirement of `numpy<2` (:issue:`90`, :pull:`91`).
41+
- Add newly missing requirement of ``numpy<2`` (:issue:`90`, :pull:`91`).
42+
43+
Internal changes
44+
~~~~~~~~~~~~~~~~
45+
46+
- Add aggregators in :mod:`hyoga.open.aggregator` (:issue:`86`, :pull:`87`).
3347

3448
.. _v0.3.1:
3549

hyoga/open/aggregator.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright (c) 2024, Julien Seguinot (juseg.dev)
2+
# GNU General Public License v3.0+ (https://www.gnu.org/licenses/gpl-3.0.txt)
3+
4+
"""
5+
This module provide so-called aggregator classes for computing multiyear means
6+
and standard deviations needed by hyoga. Aggregator objects are callables that
7+
may trigger downloads, open multi-file datasets, aggregate statistics, store
8+
them in hyoga's cache directory, and return a path to that file.
9+
"""
10+
11+
import os.path
12+
import xarray as xr
13+
import hyoga.open.downloader
14+
15+
16+
class Aggregator():
17+
"""A callable that aggregates input files and returns an output path.
18+
19+
This is a base class for callable aggregator. Customization can be done by
20+
subclassing and overwriting the following methods:
21+
22+
* :meth:`inputs`: return local paths of input files.
23+
* :meth:`output`: return local path of aggregated file.
24+
* :meth:`check`: check whether output file is present or valid.
25+
* :meth:`aggregate`: actually aggregate the data from input files.
26+
27+
Call parameters
28+
---------------
29+
inputs : str
30+
A list of paths of files to aggregate.
31+
output : str
32+
The local path of the aggregated file.
33+
34+
Returns
35+
-------
36+
output : str
37+
The local path of the aggregated file.
38+
"""
39+
40+
def __call__(self, *args, **kwargs):
41+
"""See class documentation for actual signature.
42+
43+
Parameters
44+
----------
45+
*args :
46+
Positional arguments passed to :meth:`inputs` and :meth:`output`.
47+
These two methods need to have compatible signatures.
48+
**kwargs :
49+
Keyword arguments are passed to :meth:`aggregate` to alter the
50+
aggregation recipe. This is used to provide a custom function.
51+
"""
52+
inputs = self.inputs(*args)
53+
output = self.output(*args)
54+
if not self.check(output):
55+
self.aggregate(inputs, output, **kwargs)
56+
return output
57+
58+
def inputs(self, *args):
59+
"""Return local paths of input files."""
60+
return args[0]
61+
62+
def output(self, *args):
63+
"""Return local path of aggregated file."""
64+
return args[1]
65+
66+
def check(self, path):
67+
"""Check whether output file is present."""
68+
return os.path.isfile(path)
69+
70+
def aggregate(self, inputs, output, recipe='avg'):
71+
"""Aggregate `inputs` into `output` file."""
72+
73+
# create directory if missing
74+
os.makedirs(os.path.dirname(output), exist_ok=True)
75+
76+
# open inputs as multi-file dataset
77+
with xr.open_mfdataset(
78+
inputs, chunks={'lat': 300, 'lon': 300},
79+
# FIXME this is a mixed-precision workaround specific to CW5E5
80+
preprocess=lambda ds: ds.assign(
81+
lat=ds.lat.astype('f4'), lon=ds.lon.astype('f4'))) as ds:
82+
ds = getattr(
83+
ds, recipe.replace('avg', 'mean'))('time', keep_attrs=True)
84+
85+
# store output as netcdf and return path
86+
print(f"aggregating {output} ...")
87+
ds.to_netcdf(output)
88+
return output
89+
90+
91+
class CW5E5ClimateAggregator(Aggregator):
92+
"""An aggregator to compute CHELSA-W5E5 climatologies from daily means.
93+
94+
Call parameters
95+
---------------
96+
variable : 'tasmax', 'tas', 'tasmin', 'rsds', 'pr'
97+
The short name for the CHELSA-W5E5 variable aggregated among:
98+
- daily mean precipitation ('pr', kg m-2 s-1),
99+
- daily mean surface downwelling shortwave dadiation ('rsds', W m-2),
100+
- daily mean near-surface air temperature ('tas', K),
101+
- daily maximum near surface air temperature ('tasmax', K),
102+
- daily minimum near surface air temperature ('tasmin', K).
103+
start : int
104+
The aggregation start year between 1979 and 2016.
105+
end : int
106+
The aggregation end year between 1979 and 2016.
107+
month : int
108+
The month for which data is downloaded data between 1 and 12.
109+
"""
110+
111+
def inputs(self, *args):
112+
"""Return paths of input files, downloading as necessary."""
113+
variable, start, end, month = args
114+
downloader = hyoga.open.downloader.CW5E5DailyDownloader()
115+
years = range(start, end+1)
116+
paths = (downloader(variable, year, month) for year in years)
117+
return paths
118+
119+
def output(self, *args):
120+
"""Return path of downloaded file."""
121+
variable, start, end, month = args
122+
xdg_cache = os.environ.get("XDG_CACHE_HOME", os.path.join(
123+
os.path.expanduser('~'), '.cache'))
124+
return os.path.join(
125+
xdg_cache, 'hyoga', 'cw5e5', 'clim', f'cw5e5.{variable}.mon.'
126+
f'{start % 100:02d}{end % 100:02d}.avg.{month:02d}.nc')

hyoga/open/downloader.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022, Julien Seguinot (juseg.github.io)
1+
# Copyright (c) 2024, Julien Seguinot (juseg.dev)
22
# GNU General Public License v3.0+ (https://www.gnu.org/licenses/gpl-3.0.txt)
33

44
"""
@@ -102,6 +102,37 @@ def path(self, *args):
102102
return os.path.join(xdg_cache, 'hyoga', path)
103103

104104

105+
class CW5E5DailyDownloader(CacheDownloader):
106+
"""A class to download CHELSA-W5E5 daily means by variable, year and month.
107+
108+
Call parameters
109+
---------------
110+
variable : 'tasmax', 'tas', 'tasmin', 'rsds', 'pr'
111+
The short name for the CHELSA-W5E5 variable downloaded among:
112+
- daily mean precipitation ('pr', kg m-2 s-1),
113+
- daily mean surface downwelling shortwave dadiation ('rsds', W m-2),
114+
- daily mean near-surface air temperature ('tas', K),
115+
- daily maximum near surface air temperature ('tasmax', K),
116+
- daily minimum near surface air temperature ('tasmin', K).
117+
year : int
118+
The year in which data is downloaded between 1979 and 2016.
119+
month : int
120+
The month for which data is downloaded data between 1 and 12.
121+
"""
122+
123+
def url(self, *args):
124+
variable, year, month = args
125+
return (
126+
f'https://files.isimip.org/ISIMIP3a/InputData/climate/atmosphere/'
127+
f'obsclim/global/daily/historical/CHELSA-W5E5/chelsa-w5e5_obsclim_'
128+
f'{variable}_30arcsec_{year:d}{month:02d}.nc')
129+
130+
def path(self, *args):
131+
variable, year, month = args
132+
return super().path(None, os.path.join(
133+
'cw5e5', 'daily', f'cw5e5.{variable}.day.{year:d}.{month:02d}.nc'))
134+
135+
105136
class OSFDownloader(CacheDownloader):
106137
"""A class to download files by record key from osf.io.
107138

hyoga/open/reprojected.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import xarray as xr
1414
import rioxarray # noqa pylint: disable=unused-import
1515

16+
import hyoga.open.aggregator
1617
import hyoga.open.downloader
1718

1819

@@ -69,6 +70,15 @@ def _open_climatology(source='chelsa', variable='tas'):
6970
paths, combine='nested', concat_dim='time', decode_cf=True)
7071
da = ds.band_data.squeeze()
7172

73+
# CHELSA 1981-2010 global climatologies
74+
elif source == 'cw5e5':
75+
aggregator = hyoga.open.aggregator.CW5E5ClimateAggregator()
76+
start, end = 1981, 2010 # FIXME allow custom aggregation period
77+
paths = (aggregator(variable, start, end, mon) for mon in range(1, 13))
78+
ds = xr.open_mfdataset(
79+
paths, combine='nested', concat_dim='time', decode_cf=True)
80+
da = ds[variable].rio.write_crs('+proj=longlat +datum=WGS84')
81+
7282
# invalid sources
7383
else:
7484
raise ValueError(f'{source} is not a valid climatology source.')
@@ -103,7 +113,8 @@ def _reproject_data_array(da, crs, bounds, resolution):
103113
return da
104114

105115

106-
def atmosphere(crs, bounds, resolution=1e3):
116+
def atmosphere(
117+
crs, bounds, temperature='chelsa', precipitation=None, resolution=1e3):
107118
"""
108119
Open atmospheric data from online datasets for PISM.
109120
@@ -117,6 +128,10 @@ def atmosphere(crs, bounds, resolution=1e3):
117128
bounds : (west, south, east, north)
118129
Extent for the resulting dataset in projected coordinates given by
119130
``crs``, will be passed to Dataset.rio.clip_box.
131+
precipitation : 'chelsa' or 'cw5e5', optional
132+
Precipitation rate data source, default to same as temperature.
133+
temperature : 'chelsa' or 'cw5e5', optional
134+
Near-surface air temperature data source, default to 'chelsa'.
120135
resolution : float, optional
121136
Resolution for the output dataset in projected coordinates given by
122137
``crs``, will be passed to Dataset.rio.reproject.
@@ -154,18 +169,17 @@ def atmosphere(crs, bounds, resolution=1e3):
154169
# future parameters:
155170
# - domain : str, optional
156171
# Modelling domain defining geographic projection and bounds.
157-
# - temperature : 'chelsa', optional
158-
# Near-surface air temperature data source, default to 'chelsa'.
159-
# - precipitation : 'chelsa', optional
160-
# Precipitation rate data source, default to same as temperature.
161172
# - elevation : 'chelsa', optional
162173
# Surface elevation for time-lapse corrections, default to same as
163174
# temperature.
164175

176+
# use temperature source by default
177+
precipitation = precipitation or temperature
178+
165179
# open reprojected online data
166-
temp = _open_climatology(variable='tas')
180+
temp = _open_climatology(source=temperature, variable='tas')
167181
temp = _reproject_data_array(temp, crs, bounds, resolution)
168-
prec = _open_climatology(variable='pr')
182+
prec = _open_climatology(source=precipitation, variable='pr')
169183
prec = _reproject_data_array(prec, crs, bounds, resolution)
170184
elev = _open_elevation(source='chelsa')
171185
elev = _reproject_data_array(elev, crs, bounds, resolution)

0 commit comments

Comments
 (0)