Skip to content

Commit

Permalink
Add CHELSA-W5E5 atmospheric data source. (#87)
Browse files Browse the repository at this point in the history
* Add CW5E5DailyDownloader class.

* Replace slashes with os.path.join.

* Add generic aggregator class.

* Add cw5e5 climate aggregator (wip).

* Implement cw5e5 in _open_climatology (wip).

* Add missing December in _open_climatology.

* Add temperature, precipitation atmosphere kwargs.

* Add missing cw5e5 projection info.

* Create clim directory if missing.

* Disable test region and go global.

* Make minor cleanup.

* Document changes in whatsnew.
  • Loading branch information
juseg authored Jun 20, 2024
1 parent 10bf71d commit 4cceb24
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 9 deletions.
16 changes: 15 additions & 1 deletion doc/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,24 @@ What's new
v0.3.2 (unreleased)
-------------------

New features
~~~~~~~~~~~~

- Add ``temperature`` and ``precipitation`` arguments, and CHELSA-W5E5_
(aliased ``'cw5e5'``) as a new temperature and precipitation data source in
:func:`hyoga.open.atmosphere` (:issue:`86`, :pull:`87`).

.. _CHELSA-W5E5: https://chelsa-climate.org/chelsa-w5e5-v1-0-daily-climate-data-at-1km-resolution/

Bug fixes
~~~~~~~~~

- Add newly missing requirement of `numpy<2` (:issue:`90`, :pull:`91`).
- Add newly missing requirement of ``numpy<2`` (:issue:`90`, :pull:`91`).

Internal changes
~~~~~~~~~~~~~~~~

- Add aggregators in :mod:`hyoga.open.aggregator` (:issue:`86`, :pull:`87`).

.. _v0.3.1:

Expand Down
126 changes: 126 additions & 0 deletions hyoga/open/aggregator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright (c) 2024, Julien Seguinot (juseg.dev)
# GNU General Public License v3.0+ (https://www.gnu.org/licenses/gpl-3.0.txt)

"""
This module provide so-called aggregator classes for computing multiyear means
and standard deviations needed by hyoga. Aggregator objects are callables that
may trigger downloads, open multi-file datasets, aggregate statistics, store
them in hyoga's cache directory, and return a path to that file.
"""

import os.path
import xarray as xr
import hyoga.open.downloader


class Aggregator():
"""A callable that aggregates input files and returns an output path.
This is a base class for callable aggregator. Customization can be done by
subclassing and overwriting the following methods:
* :meth:`inputs`: return local paths of input files.
* :meth:`output`: return local path of aggregated file.
* :meth:`check`: check whether output file is present or valid.
* :meth:`aggregate`: actually aggregate the data from input files.
Call parameters
---------------
inputs : str
A list of paths of files to aggregate.
output : str
The local path of the aggregated file.
Returns
-------
output : str
The local path of the aggregated file.
"""

def __call__(self, *args, **kwargs):
"""See class documentation for actual signature.
Parameters
----------
*args :
Positional arguments passed to :meth:`inputs` and :meth:`output`.
These two methods need to have compatible signatures.
**kwargs :
Keyword arguments are passed to :meth:`aggregate` to alter the
aggregation recipe. This is used to provide a custom function.
"""
inputs = self.inputs(*args)
output = self.output(*args)
if not self.check(output):
self.aggregate(inputs, output, **kwargs)
return output

def inputs(self, *args):
"""Return local paths of input files."""
return args[0]

def output(self, *args):
"""Return local path of aggregated file."""
return args[1]

def check(self, path):
"""Check whether output file is present."""
return os.path.isfile(path)

def aggregate(self, inputs, output, recipe='avg'):
"""Aggregate `inputs` into `output` file."""

# create directory if missing
os.makedirs(os.path.dirname(output), exist_ok=True)

# open inputs as multi-file dataset
with xr.open_mfdataset(
inputs, chunks={'lat': 300, 'lon': 300},
# FIXME this is a mixed-precision workaround specific to CW5E5
preprocess=lambda ds: ds.assign(
lat=ds.lat.astype('f4'), lon=ds.lon.astype('f4'))) as ds:
ds = getattr(
ds, recipe.replace('avg', 'mean'))('time', keep_attrs=True)

# store output as netcdf and return path
print(f"aggregating {output} ...")
ds.to_netcdf(output)
return output


class CW5E5ClimateAggregator(Aggregator):
"""An aggregator to compute CHELSA-W5E5 climatologies from daily means.
Call parameters
---------------
variable : 'tasmax', 'tas', 'tasmin', 'rsds', 'pr'
The short name for the CHELSA-W5E5 variable aggregated among:
- daily mean precipitation ('pr', kg m-2 s-1),
- daily mean surface downwelling shortwave dadiation ('rsds', W m-2),
- daily mean near-surface air temperature ('tas', K),
- daily maximum near surface air temperature ('tasmax', K),
- daily minimum near surface air temperature ('tasmin', K).
start : int
The aggregation start year between 1979 and 2016.
end : int
The aggregation end year between 1979 and 2016.
month : int
The month for which data is downloaded data between 1 and 12.
"""

def inputs(self, *args):
"""Return paths of input files, downloading as necessary."""
variable, start, end, month = args
downloader = hyoga.open.downloader.CW5E5DailyDownloader()
years = range(start, end+1)
paths = (downloader(variable, year, month) for year in years)
return paths

def output(self, *args):
"""Return path of downloaded file."""
variable, start, end, month = args
xdg_cache = os.environ.get("XDG_CACHE_HOME", os.path.join(
os.path.expanduser('~'), '.cache'))
return os.path.join(
xdg_cache, 'hyoga', 'cw5e5', 'clim', f'cw5e5.{variable}.mon.'
f'{start % 100:02d}{end % 100:02d}.avg.{month:02d}.nc')
33 changes: 32 additions & 1 deletion hyoga/open/downloader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, Julien Seguinot (juseg.github.io)
# Copyright (c) 2024, Julien Seguinot (juseg.dev)
# GNU General Public License v3.0+ (https://www.gnu.org/licenses/gpl-3.0.txt)

"""
Expand Down Expand Up @@ -102,6 +102,37 @@ def path(self, *args):
return os.path.join(xdg_cache, 'hyoga', path)


class CW5E5DailyDownloader(CacheDownloader):
"""A class to download CHELSA-W5E5 daily means by variable, year and month.
Call parameters
---------------
variable : 'tasmax', 'tas', 'tasmin', 'rsds', 'pr'
The short name for the CHELSA-W5E5 variable downloaded among:
- daily mean precipitation ('pr', kg m-2 s-1),
- daily mean surface downwelling shortwave dadiation ('rsds', W m-2),
- daily mean near-surface air temperature ('tas', K),
- daily maximum near surface air temperature ('tasmax', K),
- daily minimum near surface air temperature ('tasmin', K).
year : int
The year in which data is downloaded between 1979 and 2016.
month : int
The month for which data is downloaded data between 1 and 12.
"""

def url(self, *args):
variable, year, month = args
return (
f'https://files.isimip.org/ISIMIP3a/InputData/climate/atmosphere/'
f'obsclim/global/daily/historical/CHELSA-W5E5/chelsa-w5e5_obsclim_'
f'{variable}_30arcsec_{year:d}{month:02d}.nc')

def path(self, *args):
variable, year, month = args
return super().path(None, os.path.join(
'cw5e5', 'daily', f'cw5e5.{variable}.day.{year:d}.{month:02d}.nc'))


class OSFDownloader(CacheDownloader):
"""A class to download files by record key from osf.io.
Expand Down
28 changes: 21 additions & 7 deletions hyoga/open/reprojected.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import xarray as xr
import rioxarray # noqa pylint: disable=unused-import

import hyoga.open.aggregator
import hyoga.open.downloader


Expand Down Expand Up @@ -69,6 +70,15 @@ def _open_climatology(source='chelsa', variable='tas'):
paths, combine='nested', concat_dim='time', decode_cf=True)
da = ds.band_data.squeeze()

# CHELSA 1981-2010 global climatologies
elif source == 'cw5e5':
aggregator = hyoga.open.aggregator.CW5E5ClimateAggregator()
start, end = 1981, 2010 # FIXME allow custom aggregation period
paths = (aggregator(variable, start, end, mon) for mon in range(1, 13))
ds = xr.open_mfdataset(
paths, combine='nested', concat_dim='time', decode_cf=True)
da = ds[variable].rio.write_crs('+proj=longlat +datum=WGS84')

# invalid sources
else:
raise ValueError(f'{source} is not a valid climatology source.')
Expand Down Expand Up @@ -103,7 +113,8 @@ def _reproject_data_array(da, crs, bounds, resolution):
return da


def atmosphere(crs, bounds, resolution=1e3):
def atmosphere(
crs, bounds, temperature='chelsa', precipitation=None, resolution=1e3):
"""
Open atmospheric data from online datasets for PISM.
Expand All @@ -117,6 +128,10 @@ def atmosphere(crs, bounds, resolution=1e3):
bounds : (west, south, east, north)
Extent for the resulting dataset in projected coordinates given by
``crs``, will be passed to Dataset.rio.clip_box.
precipitation : 'chelsa' or 'cw5e5', optional
Precipitation rate data source, default to same as temperature.
temperature : 'chelsa' or 'cw5e5', optional
Near-surface air temperature data source, default to 'chelsa'.
resolution : float, optional
Resolution for the output dataset in projected coordinates given by
``crs``, will be passed to Dataset.rio.reproject.
Expand Down Expand Up @@ -154,18 +169,17 @@ def atmosphere(crs, bounds, resolution=1e3):
# future parameters:
# - domain : str, optional
# Modelling domain defining geographic projection and bounds.
# - temperature : 'chelsa', optional
# Near-surface air temperature data source, default to 'chelsa'.
# - precipitation : 'chelsa', optional
# Precipitation rate data source, default to same as temperature.
# - elevation : 'chelsa', optional
# Surface elevation for time-lapse corrections, default to same as
# temperature.

# use temperature source by default
precipitation = precipitation or temperature

# open reprojected online data
temp = _open_climatology(variable='tas')
temp = _open_climatology(source=temperature, variable='tas')
temp = _reproject_data_array(temp, crs, bounds, resolution)
prec = _open_climatology(variable='pr')
prec = _open_climatology(source=precipitation, variable='pr')
prec = _reproject_data_array(prec, crs, bounds, resolution)
elev = _open_elevation(source='chelsa')
elev = _reproject_data_array(elev, crs, bounds, resolution)
Expand Down

0 comments on commit 4cceb24

Please sign in to comment.