Skip to content
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ __pycache__/

# netcdf files - whitelist specific files
*.nc
!tests/resources/surfdata_4x5.nc
!tests/resources/*.nc

# C extensions
*.so
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ Generating the land use x pft static mapping dataset requires as input the CLM 5

The LUH2 static data file noted in the LUH2 historical dataset section above is also required as an input.

### Regridding weight file for spectral element regridding

In order to regrid to spectral element resolution an ESMF generated weight-file is needed. This can be generated using ESMF (https://earthsystemmodeling.org/docs/release/ESMF_8_4_2/ESMF_refdoc/node3.html) :
``` sh
ESMF_RegridWeightGen --source name_of_file_with_source_resolution --destination name_of_file_with_spectral_element_resolution --weigth name_of_weight_file --method conserve
```
As the LUH2 data are very highly resolved, the ESMF might not be able to regrid directly to the spectral element resolution from the LUH2 datasets, hence the source resolution here, might need to be on some regular grid that is fairly close to the spectral element grid resolution (this code version was tested using an 0.9x1.25 degree resolution into ne30 spectral element grid).

## Installation

### Dependencies
Expand Down Expand Up @@ -107,6 +115,9 @@ options:
beginning of date range of interest
-e END, --end END ending of date range to slice
-o OUTPUT, --output OUTPUT
-i INTERMEDIATE_REGRIDDING_FILE, --intermediate_regridding_file INTERMEDIATE_REGRIDDING_FILE
filename of intermediate regridding weightfile for two-step regridding
to spectral element resolution
output filename
```

Expand All @@ -128,6 +139,9 @@ options:
-h, --help show this help message and exit
-o OUTPUT, --output OUTPUT
output filename
-i INTERMEDIATE_REGRIDDING_FILE, --intermediate_regridding_file INTERMEDIATE_REGRIDDING_FILE
filename of intermediate regridding weightfile for two-step regridding
to spectral element resolution

```

Expand Down
10 changes: 10 additions & 0 deletions src/landusedata/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def main(argv=None):
luh2_parser.add_argument("-o","--output",
default = 'LUH2_timeseries.nc',
help = "output filename")
luh2_parser.add_argument("-i", "--intermediate_regridding_file",
default = None,
help = "filename of file on intermediate regridding format")

# Landuse x pft subparser arguments
lupft_parser.add_argument('regrid_target_file',
Expand All @@ -65,6 +68,13 @@ def main(argv=None):
lupft_parser.add_argument("-o","--output",
default = 'fates_landuse_pft_map.nc',
help = "output filename")
lupft_parser.add_argument("-w", "--regridder_weights",
default = 'regridder.nc',
help = "filename of regridder weights to save")
lupft_parser.add_argument("-i", "--intermediate_regridding_file",
default = None,
help = "filename of file on intermediate regridding format")


# Parse the arguments
args = parser.parse_args(argv)
Expand Down
17 changes: 14 additions & 3 deletions src/landusedata/landusepft.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import argparse, os, sys
import xarray as xr
import xesmf as xe
from datetime import datetime, UTC

from landusedata.landusepftmod import ImportLandusePFTFile, AddLatLonCoordinates, RenormalizePFTs
from landusedata.utils import ImportLUH2StaticFile, ImportRegridTarget
from landusedata.utils import SetMaskRegridTarget, DefineStaticMask
from landusedata.regrid import GenerateRegridder

def main(args):

Expand Down Expand Up @@ -70,7 +72,9 @@ def main(args):

# Regrid current dataset
print('Regridding {}'.format(varname))
regridder = xe.Regridder(ds_percent, ds_target, "conservative_normed")
#regridder = xe.Regridder(ds_percent, ds_target, "conservative_normed")
print(args)
regridder = GenerateRegridder(ds_percent, ds_target, args.regridder_weights, regrid_reuse=False, regrid_method="conservative_normed", intermediate_regridding_file=args.intermediate_regridding_file)
ds_regrid = regridder(ds_percent)

# Drop mask to avoid conflicts when merging
Expand All @@ -79,6 +83,8 @@ def main(args):
ds_regrid = ds_regrid.drop_vars(['mask'])

# Append the new dataset to the output dataset
print(ds_regrid)
print(ds_regrid.indexes)
ds_output = ds_output.merge(ds_regrid)

# Duplicate the 'primary' data array into a 'secondary' data array. Eventually
Expand All @@ -88,8 +94,13 @@ def main(args):
# ds_regrid = ds_regrid.rename_dims(dims_dict={'lat':'lsmlat','lon':'lsmlon'})

# Output dataset to netcdf file
print('Writing fates landuse x pft dataset to file')
output_file = os.path.join(os.getcwd(),args.output)
if args.output == "fates_landuse_pft_map.nc":
fname = f"fates_landuse_pft_map_to_{args.regrid_target_file.split('/')[-1].split('.')[0]}_{datetime.now(UTC).strftime('%y%m%d')}.nc"
print(fname)
output_file = os.path.join(os.getcwd(), fname)
else:
output_file = os.path.join(os.getcwd(),args.output)
print(f'Writing fates landuse x pft dataset to file {output_file}')
ds_output.to_netcdf(output_file)

if __name__ == "__main__":
Expand Down
16 changes: 11 additions & 5 deletions src/landusedata/luh2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse, os, sys

import xarray as xr
from datetime import datetime, UTC

from landusedata.luh2mod import ImportLUH2TimeSeries, CorrectStateSum
from landusedata.regrid import RegridConservative, RegridLoop
Expand Down Expand Up @@ -50,7 +51,8 @@ def main(args):
# Regrid the luh2 data to the target grid
# TO DO: provide a check for the save argument based on the input arguments
regrid_luh2,regridder_luh2 = RegridConservative(dataset, ds_regrid_target,
args.regridder_weights, regrid_reuse)
args.regridder_weights, regrid_reuse,
intermediate_regridding_file=args.intermediate_regridding_file)

# Regrid the inverted ice/water fraction data to the target grid
regrid_land_fraction = regridder_luh2(ds_luh2_static)
Expand All @@ -74,14 +76,15 @@ def main(args):
regrid_luh2["LATIXY"] = ds_regrid_target["LATIXY"] # TO DO: double check if this is strictly necessary

# Rename the dimensions for the output. This needs to happen after the "LONGXY/LATIXY" assignment
if (not 'lsmlat' in list(regrid_luh2.dims)):
if (not 'lsmlat' in list(regrid_luh2.dims)) and 'lsmlat' in list(ds_regrid_target.dims):
regrid_luh2 = regrid_luh2.rename_dims({'lat':'lsmlat','lon':'lsmlon'})

# Reapply the coordinate attributes. This is a workaround for an xarray bug (#8047)
# Currently only need time
regrid_luh2.time.attrs = dataset.time.attrs
regrid_luh2.lat.attrs = dataset.lat.attrs
regrid_luh2.lon.attrs = dataset.lon.attrs
if 'lsmlat' in list(ds_regrid_target.dims):
regrid_luh2.lat.attrs = dataset.lat.attrs
regrid_luh2.lon.attrs = dataset.lon.attrs

# Merge previous regrided luh2 file with merge input target
# TO DO: check that the grid resolution matches
Expand All @@ -97,7 +100,10 @@ def main(args):

# Write the files
# TO DO: add check to handle if the user enters the full path
output_file = os.path.join(os.getcwd(),args.output)
if args.output == "LUH2_timeseries.nc":
output_file = os.path.join(os.getcwd(), f"LUH2_timeseries_to_{args.regrid_target_file.split('/')[-1].split('.')[0]}_{datetime.now(UTC).strftime('%y%m%d')}.nc")
else:
output_file = os.path.join(os.getcwd(),args.output)
print("generating output: {}".format(output_file))
ds_output.to_netcdf(output_file)

Expand Down
46 changes: 39 additions & 7 deletions src/landusedata/luh2mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ def PrepDataset(input_dataset,start=None,stop=None):

# Get the units to determine the file time
# It is expected that the units of time is 'years since ...'

orig_unit = input_dataset.time.units
if orig_unit.startswith("days"):
input_dataset['time'] = input_dataset.time.values//365
input_dataset.time.attrs['units'] = orig_unit.replace("days", "years")
time_since_array = input_dataset.time.units.split()
if (time_since_array[0] != 'years'):
sys.exit("FileTimeUnitsError: input file units of time is not 'years since ...'")
if not (time_since_array[0] == 'years'):
sys.exit("FileTimeUnitsError: input file units of time is not 'years since ...' or 'days since ...'")

# Note that datetime package is not used as the date range might
# be beyond the bounds of the packages applicable bounds
Expand Down Expand Up @@ -57,7 +62,10 @@ def PrepDataset(input_dataset,start=None,stop=None):
# Truncate the data to the user defined range
# This might need some more error handling for when
# the start/stop is out of range
input_dataset = input_dataset.sel(time=slice(years_since_start,years_since_stop))
if (time_since_array[0] == 'years'):
input_dataset = input_dataset.sel(time=slice(years_since_start,years_since_stop))
else:
input_dataset = input_dataset.sel(time=slice(years_since_start*365.,years_since_stop*365.))

# Save the timesince as a variable for future use
input_dataset["timesince"] = time_since
Expand All @@ -75,16 +83,40 @@ def _BoundsVariableFixLUH2(input_dataset):

# Create lat and lon bounds as a single dimension array out of the LUH2 two dimensional_bounds array.
# Future todo: is it possible to have xESMF recognize and use the original 2D array?
input_dataset["lat_b"] = np.insert(input_dataset.lat_bounds[:,1].data,0,input_dataset.lat_bounds[0,0].data)
input_dataset["lon_b"] = np.insert(input_dataset.lon_bounds[:,1].data,0,input_dataset.lon_bounds[0,0].data)
print(list(input_dataset.data_vars))
if "lat_bounds" in input_dataset.keys():
input_dataset["lat_b"] = np.insert(input_dataset.lat_bounds[:,1].data,0,input_dataset.lat_bounds[0,0].data)
elif "lat_bnds" in input_dataset.keys():
input_dataset["lat_b"] = np.insert(input_dataset.lat_bnds[:,1].data,0,input_dataset.lat_bnds[0,0].data)
# If bounds not found, make up evenly distributed bounds:
else:
input_dataset["lat_b"] = make_evenly_distributed_bounds_array(input_dataset.lat.values)
#sys.exit("LatBoundError: input file does not have latitudinal bounds of a recognised name (lat_bounds or lat_bnds)")
if "lon_bounds" in input_dataset.keys():
input_dataset["lon_b"] = np.insert(input_dataset.lon_bounds[:,1].data,0,input_dataset.lon_bounds[0,0].data)
# Drop the old boundary names to avoid confusion
input_dataset = input_dataset.drop(labels=['lat_bounds','lon_bounds'])
elif "lon_bnds" in input_dataset.keys():
input_dataset["lon_b"] = np.insert(input_dataset.lon_bnds[:,1].data,0,input_dataset.lon_bnds[0,0].data)
# Drop the old boundary names to avoid confusion
input_dataset = input_dataset.drop(labels=['lat_bnds','lon_bnds'])
# If bounds not found, make up evenly distributed bounds:
else:
input_dataset["lon_b"] = make_evenly_distributed_bounds_array(input_dataset.lon.values)
#sys.exit("LonBoundError: input file does not have longitudinal bounds of a recognised name (lon_bounds or lon_bnds)")

# Drop the old boundary names to avoid confusion
input_dataset = input_dataset.drop(labels=['lat_bounds','lon_bounds'])

print("LUH2 dataset lat/lon boundary variables formatted and added as new variable for xESMF")

return(input_dataset)

def make_evenly_distributed_bounds_array(orig_array):
step = (orig_array[1] - orig_array[0])/2.
lat_b= np.append((orig_array - step), orig_array[-1] + step)
#print(lat_b)
#print(len(lat_b))
return lat_b

# Temporary: Add minor correction factor to assure states sum to one
def CorrectStateSum(input_dataset):

Expand Down
101 changes: 95 additions & 6 deletions src/landusedata/regrid.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,47 @@
import numpy as np
import xesmf as xe
import xarray as xr

def RegridConservative(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse):
import sys

from .utils import ImportRegridTarget

class TwoStepRegridder:

def __init__(self, ds_to_regrid, regridder_steptwo_weights, intermediate_regridding_file, regrid_method="conservative"):

self.se_regridder = make_se_regridder(regridder_steptwo_weights, regrid_method=regrid_method)
intermediate_ds = ImportRegridTarget(intermediate_regridding_file)
#print(intermediate_ds)
self.step_one_regridder = xe.Regridder(ds_to_regrid, intermediate_ds, regrid_method)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maritsandstad can you describe the workflow for using the intermediate regridding target?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I wasn't able to coax ESMF into making a regridding file directly from 0.5by0.5 degree to the ne30 spectral resolution. Therefore, I needed to regrid from that to the regular grid I was able to get ESMF to make a file for regridding to spectral resolution for (0.9x1.25 degrees), and then I regrid over to the spectral grid from there using my map definition file (which I wasn't allowed to upload so far.) Not ideal, I guess. Do you want me to comment the code better?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation @maritsandstad. Does the interstitial regular grid just need to be a grid close in resolution to the final spectral grid resolution? I'm not familiar with the nomeclature for the spectral grids; what's the 30 in ne30 refer to and what resolution in degrees is it close to?

Code comments are always welcome, but in this case I think it'd be best to add a section to the readme explaining the workflow for the spectral grid option.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it would be good to get an explanation of how to generate the map definition file and what tool you used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am also not a resolution expert, but I think ne30 is around 1 degree resolution. I also don't actually know exactly how close the resolution needs to be for ESMF to be able to make a scripgrid file to define the transition. BTW I used ESMF (which is the regridding backend to xesmf) to define the grid file map definition using one of its command line prompts, I'll add the details in the README.

#print(self.step_one_regridder)

def two_step_regridding(self, ds_to_regrid):
ds_intermediate = self.step_one_regridder(ds_to_regrid)
ds_se = self.se_regridder(ds_intermediate).squeeze("lat", drop=True)
#print(ds_se)
return ds_se.rename({"lon":"lndgrid"}).drop_vars("lndgrid")


def RegridConservative(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse, regrid_method="conservative", intermediate_regridding_file=None):

# define the regridder transformation
regridder = GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse)
regridder = GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse, regrid_method=regrid_method, intermediate_regridding_file=intermediate_regridding_file)

# Loop through the variables to regrid
ds_regrid = RegridLoop(ds_to_regrid, regridder)

return (ds_regrid, regridder)

def GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights_file, regrid_reuse):

regrid_method = "conservative"
def GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights_file, regrid_reuse, regrid_method = "conservative", intermediate_regridding_file=None):

print("\nDefining regridder, method: ", regrid_method)
#print(ds_to_regrid.dims)
if 'lat' not in ds_regrid_target.dims:
two_step_regridder = TwoStepRegridder(ds_to_regrid, regridder_weights_file, intermediate_regridding_file, regrid_method=regrid_method)
regridder = two_step_regridder.two_step_regridding #make_se_regridder(regridder_weights_file, regrid_method=regrid_method)

if (regrid_reuse):
elif (regrid_reuse):
regridder = xe.Regridder(ds_to_regrid, ds_regrid_target,
regrid_method, weights=regridder_weights_file)
else:
Expand All @@ -27,6 +53,69 @@ def GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights_file, re

return(regridder)


def make_se_regridder(weight_file, regrid_method):
weights = xr.open_dataset(weight_file)
in_shape = weights.src_grid_dims.load().data.tolist()[::-1]
Comment on lines +57 to +59
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maritsandstad I'm hitting an error here when I try and supply a weight file during the two-step process:

  File "/home/glemieux/Repos/NGEET/Tools/tools-fates-landusedata/src/landusedata/regrid.py", line 59, in make_se_regridder
    in_shape = weights.src_grid_dims.load().data.tolist()[::-1]
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/glemieux/local/miniconda3/envs/landusedata-env/lib/python3.13/site-packages/xarray/core/common.py", line 306, in __getattr__
    raise AttributeError(
        f"{type(self).__name__!r} object has no attribute {name!r}"
    )
AttributeError: 'Dataset' object has no attribute 'src_grid_dims'

I'm using the output from using the -w option from a previous non-two step run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I don't think that would work, you need a map file that defines both the source grid dimensions and the destination grid, but it might very well be that the code could work with other things than what I used, but then there might have to be some hooks for testing the type of weight-file that's in use... Also, if you can do the transition without the two-step regridding. That would be preferable, but I did not manage to make weight files to make that work...


# Since xESMF expects 2D vars, we'll insert a dummy dimension of size-1
if len(in_shape) == 1:
in_shape = [1, in_shape.item()]

# output variable shape
out_shape = weights.dst_grid_dims.load().data#.tolist()[::-1]
if len(out_shape) == 1:
out_shape = [1, out_shape.item()]

#print(in_shape, out_shape)
#print(weights)
#print(len(weights.yc_a.data.reshape(in_shape)[:, 0]))
#print(weights.yc_a.data.reshape(in_shape)[:, 0])
#print(len((weights.xc_a.data.reshape(in_shape)[0, :])))
#print((weights.xc_a.data.reshape(in_shape)[0, :]))
#sys.exit(4)
# Making some bounds inputs:
#print(in_shape)
lat_b_in = np.zeros(in_shape[0]+1)
lon_b_in = weights.xv_a.data[:in_shape[1]+1, 0]
#print(lon_b_in[0:10])
#print(weights.yv_a.data.shape)
#print(np.arange(in_shape[0]+1)*in_shape[1],)
#print(weights.yv_a.data[-5:-1, :])
lat_b_in[:-1] = weights.yv_a.data[np.arange(in_shape[0])*in_shape[1],0]
lat_b_in[-1] = weights.yv_a.data[-1,-1]
#print(lat_b_in)
#sys.exit(4)

dummy_out = xr.Dataset(
{
"lat": ("lat", np.empty((out_shape[0],))),
"lon": ("lon", np.empty((out_shape[1],))),
"lat_b": ("lat_b", np.empty((out_shape[0]+1,))),
"lon_b": ("lon_b", np.empty((out_shape[1]+1,)))
}
)
dummy_in= xr.Dataset(
{
"lat": ("lat", weights.yc_a.data.reshape(in_shape)[:, 0]),
"lon": ("lon", weights.xc_a.data.reshape(in_shape)[0, :]),
"lat_b": ("lat_b", lat_b_in),
"lon_b": ("lon_b", lon_b_in)
}
)

regridder = xe.Regridder(
dummy_in,
dummy_out,
weights=weight_file,
#method="conservative_normed",
method=regrid_method,
#method="bilinear",
reuse_weights=True,
periodic=True,
)
return regridder

def RegridLoop(ds_to_regrid, regridder):

# To Do: implement this with dask
Expand Down
Loading