Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
13cd040
deprecate cdo/python-cdo: replace CDO usage with xarray, add deprecat…
Copilot Apr 1, 2026
8f5552e
revert unrelated JSON file changes
Copilot Apr 1, 2026
5439a08
address code review: use xarray .dt accessor and context managers
Copilot Apr 1, 2026
0443782
remove CDO, add xarray averager, rename frepytools to NumpyTimeAverag…
Copilot Apr 3, 2026
4c91d82
fix: handle non-numeric vars and cftime time_bnds in xarrayTimeAverag…
Copilot Apr 3, 2026
e7f465b
test: add comprehensive unit tests for xarrayTimeAverager and add wei…
Copilot Apr 3, 2026
164c394
test: address code review — rename bnds_dtype → time_bnds_encoding, f…
Copilot Apr 3, 2026
c8fdd85
fix: NumpyTimeAverager now handles arbitrary dimensions (scalar, 3D, …
Copilot Apr 9, 2026
61babc7
some additional logging is helpful
ilaflott Apr 10, 2026
80ecff9
put default back to cdo to check for smooth transition behavior. add …
ilaflott Apr 10, 2026
5e603f3
refactor: move NumpyTimeAverager to numpyTimeAverager.py, add monthly…
Copilot Apr 10, 2026
7689a02
revert unrelated CMOR test file changes from test runs
Copilot Apr 10, 2026
381ba00
fix: typo in numpyTimeAverager.py comment
Copilot Apr 10, 2026
50482c9
fix: strip stale unlimited_dims encoding from xarrayTimeAverager outp…
Copilot Apr 10, 2026
8563837
feat: add rigorous numerical accuracy tests and fre-python-tools mont…
Copilot Apr 10, 2026
473849f
refactor: derive unlimited_dims from encoding rather than hardcoding …
Copilot Apr 10, 2026
09c34e4
feat: add test_numpyTimeAverager.py covering uncovered exception/erro…
Copilot Apr 10, 2026
b5dcf5a
feat: add cross-pkg weighted averaging bitwise reproducibility tests …
Copilot Apr 10, 2026
f88e5e6
fix: correct rtol value in cross-pkg test docstring from 1e-12 to 1e-6
Copilot Apr 10, 2026
b18aadc
fix: correctly reduce time-metadata variables (time_bnds, time, avera…
Copilot Apr 10, 2026
2333bbb
update frenctools timeaverager tests
ilaflott Apr 10, 2026
99729f4
docs: update time averager documentation — README.md, docs/tools/app.…
Copilot Apr 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ dependencies:
- noaa-gfdl::analysis_scripts==0.0.1
- noaa-gfdl::catalogbuilder==2025.01.01
# - noaa-gfdl::fre-nctools==2022.02.01
- conda-forge::cdo>=2
- conda-forge::cftime
- conda-forge::click>=8.2
- conda-forge::cmor>=3.14
Expand All @@ -22,7 +21,6 @@ dependencies:
- conda-forge::pytest
- conda-forge::pytest-cov
- conda-forge::pylint
- conda-forge::python-cdo
- conda-forge::pyyaml
- conda-forge::xarray>=2024.*
- conda-forge::netcdf4>=1.7.*
4 changes: 2 additions & 2 deletions fre/app/freapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def mask_atmos_plevel(infile, psfile, outfile, warn_no_ps):
required = True,
help = "Output file name")
@click.option("-p", "--pkg",
type = click.Choice(["cdo","fre-nctools","fre-python-tools"]),
type = click.Choice(["cdo","fre-nctools","fre-python-tools","xarray","numpy"]),
default = "cdo",
help = "Time average approach")
@click.option("-v", "--var",
Expand Down Expand Up @@ -192,7 +192,7 @@ def gen_time_averages(inf, outf, pkg, var, unwgt, avg_type):
required = True,
help = "Frequency of desired climatology: 'mon' or 'yr'")
@click.option("-p", "--pkg",
type = click.Choice(["cdo","fre-nctools","fre-python-tools"]),
type = click.Choice(["cdo","fre-nctools","fre-python-tools","xarray","numpy"]),
default = "cdo",
help = "Time average approach")
def gen_time_averages_wrapper(cycle_point, dir_, sources, output_interval, input_interval, grid, frequency, pkg):
Expand Down
3 changes: 2 additions & 1 deletion fre/app/generate_time_averages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
'''required for generate_time_averages module import functionality'''
__all__ = ['generate_time_averages', 'timeAverager', 'wrapper', 'combine',
'frenctoolsTimeAverager', 'cdoTimeAverager', 'frepytoolsTimeAverager']
'frenctoolsTimeAverager', 'cdoTimeAverager', 'frepytoolsTimeAverager',
'xarrayTimeAverager']
96 changes: 21 additions & 75 deletions fre/app/generate_time_averages/cdoTimeAverager.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,35 @@
''' class using (mostly) cdo functions for time-averages '''
''' stub that redirects pkg='cdo' requests to the xarray time averager '''

import logging
import warnings

from netCDF4 import Dataset
import numpy as np

import cdo
from cdo import Cdo

from .timeAverager import timeAverager
from .xarrayTimeAverager import xarrayTimeAverager

fre_logger = logging.getLogger(__name__)

class cdoTimeAverager(timeAverager):

class cdoTimeAverager(xarrayTimeAverager): # pylint: disable=invalid-name
'''
class inheriting from abstract base class timeAverager
generates time-averages using cdo (mostly, see weighted approach)
Legacy entry-point kept for backward compatibility.
CDO/python-cdo has been removed. All work is now done by xarrayTimeAverager.
'''

def generate_timavg(self, infile = None, outfile = None):
def generate_timavg(self, infile=None, outfile=None):
"""
use cdo package routines via python bindings
Emit a loud warning then delegate to the xarray implementation.

:param self: This is an instance of the class cdoTimeAverager
:param infile: path to history file, or list of paths, default is None
:type infile: str, list
:param outfile: path to where output file should be stored, default is None
:param infile: path to input NetCDF file
:type infile: str
:param outfile: path to output file
:type outfile: str
:return: 1 if the instance variable self.avg_typ is unsupported, 0 if function has a clean exit
:return: 0 on success
:rtype: int
"""

if self.avg_type not in ['all', 'seas', 'month']:
fre_logger.error('requested unknown avg_type %s.', self.avg_type)
raise ValueError(f'requested unknown avg_type {self.avg_type}')

if self.var is not None:
fre_logger.warning('WARNING: variable specification not twr supported for cdo time averaging. ignoring!')

fre_logger.info('python-cdo version is %s', cdo.__version__)

_cdo = Cdo()

wgts_sum = 0
if not self.unwgt: #weighted case, cdo ops alone don't support a weighted time-average.

nc_fin = Dataset(infile, 'r')

time_bnds = nc_fin['time_bnds'][:].copy()
# Ensure float64 precision for consistent results across numpy versions
# NumPy 2.0 changed type promotion rules (NEP 50), so explicit casting
# is needed to avoid precision differences
time_bnds = np.asarray(time_bnds, dtype=np.float64)
# Transpose once to avoid redundant operations
time_bnds_transposed = np.moveaxis(time_bnds, 0, -1)
wgts = time_bnds_transposed[1] - time_bnds_transposed[0]
# Use numpy.sum for consistent dtype handling across numpy versions
wgts_sum = np.sum(wgts, dtype=np.float64)

fre_logger.debug('wgts_sum = %s', wgts_sum)

if self.avg_type == 'all':
fre_logger.info('time average over all time requested.')
if self.unwgt:
_cdo.timmean(input = infile, output = outfile, returnCdf = True)
else:
_cdo.divc( str(wgts_sum), input = "-timsum -muldpm "+infile, output = outfile)
fre_logger.info('done averaging over all time.')

elif self.avg_type == 'seas':
fre_logger.info('seasonal time-averages requested.')
_cdo.yseasmean(input = infile, output = outfile, returnCdf = True)
fre_logger.info('done averaging over seasons.')

elif self.avg_type == 'month':
fre_logger.info('monthly time-averages requested.')
outfile_str = str(outfile)
_cdo.ymonmean(input = infile, output = outfile_str, returnCdf = True)
fre_logger.info('done averaging over months.')

fre_logger.warning(" splitting by month")
outfile_root = outfile_str.removesuffix(".nc") + '.'
_cdo.splitmon(input = outfile_str, output = outfile_root)
fre_logger.debug('Done with splitting by month, outfile_root = %s', outfile_root)

fre_logger.info('done averaging')
fre_logger.info('output file created: %s', outfile)
return 0
msg = (
"WARNING *** CDO/python-cdo has been REMOVED from fre-cli. "
"pkg='cdo' now uses the xarray time-averager under the hood. "
"Please switch to pkg='xarray' or pkg='fre-python-tools'. ***"
)
warnings.warn(msg, FutureWarning, stacklevel=2)
fre_logger.warning(msg)
return super().generate_timavg(infile=infile, outfile=outfile)
68 changes: 35 additions & 33 deletions fre/app/generate_time_averages/frenctoolsTimeAverager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from subprocess import Popen, PIPE
from pathlib import Path

from cdo import Cdo
import xarray as xr
from .timeAverager import timeAverager

fre_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -80,38 +80,40 @@ def generate_timavg(self, infile=None, outfile=None):
month_output_file_paths[month_index] = os.path.join( output_dir,
f"{Path(outfile).stem}.{month_index:02d}.nc")

cdo = Cdo()
#Loop through each month and select the corresponding data
for month_index in month_indices:

#month_name = month_names[month_index - 1]
nc_monthly_file = nc_month_file_paths[month_index]

#Select data for the given month
cdo.select(f"month={month_index}", input=infile, output=nc_monthly_file)

#Run timavg command for newly created file
month_output_file = month_output_file_paths[month_index]
#timavgcsh_command=['timavg.csh', '-mb','-o', month_output_file, nc_monthly_file]
timavgcsh_command=[shutil.which('timavg.csh'), '-dmb','-o', month_output_file, nc_monthly_file]
fre_logger.info( 'timavgcsh_command is %s', ' '.join(timavgcsh_command) )
exitstatus=1
with Popen(timavgcsh_command,
stdout=PIPE, stderr=PIPE, shell=False) as subp:
stdout, stderr = subp.communicate()
stdoutput=stdout.decode()
fre_logger.info('output= %s', stdoutput)
stderror=stderr.decode()
fre_logger.info('error = %s', stderror )

if subp.returncode != 0:
fre_logger.error('stderror = %s', stderror)
raise ValueError(f'error: timavg.csh had a problem, subp.returncode = {subp.returncode}')

fre_logger.info('%s climatology successfully ran',nc_monthly_file)
exitstatus=0

#Delete files after being used to generate output files
with xr.open_dataset(infile) as ds_in:
#Loop through each month and select the corresponding data
for month_index in month_indices:

#month_name = month_names[month_index - 1]
nc_monthly_file = nc_month_file_paths[month_index]

#Select data for the given month
month_ds = ds_in.sel(time=ds_in['time'].dt.month == month_index)
month_ds.to_netcdf(nc_monthly_file)
month_ds.close()

#Run timavg command for newly created file
month_output_file = month_output_file_paths[month_index]
#timavgcsh_command=['timavg.csh', '-mb','-o', month_output_file, nc_monthly_file]
timavgcsh_command=[shutil.which('timavg.csh'), '-dmb','-o', month_output_file, nc_monthly_file]
fre_logger.info( 'timavgcsh_command is %s', ' '.join(timavgcsh_command) )
exitstatus=1
with Popen(timavgcsh_command,
stdout=PIPE, stderr=PIPE, shell=False) as subp:
stdout, stderr = subp.communicate()
stdoutput=stdout.decode()
fre_logger.info('output= %s', stdoutput)
stderror=stderr.decode()
fre_logger.info('error = %s', stderror )

if subp.returncode != 0:
fre_logger.error('stderror = %s', stderror)
raise ValueError(f'error: timavg.csh had a problem, subp.returncode = {subp.returncode}')

fre_logger.info('%s climatology successfully ran',nc_monthly_file)
exitstatus=0

#Delete files after being used to generate output files
shutil.rmtree('monthly_nc_files')

if self.avg_type == 'month': #End here if month variable used
Expand Down
76 changes: 27 additions & 49 deletions fre/app/generate_time_averages/frepytoolsTimeAverager.py
Comment thread
ilaflott marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

fre_logger = logging.getLogger(__name__)

class frepytoolsTimeAverager(timeAverager):
class NumpyTimeAverager(timeAverager): # pylint: disable=invalid-name
'''
class inheriting from abstract base class timeAverager
generates time-averages using a python-native approach
Expand Down Expand Up @@ -95,58 +95,33 @@ def generate_timavg(self, infile = None, outfile = None):
fre_logger.debug('wgts_sum = %s', wgts_sum)


# initialize arrays, is there better practice for reserving the memory necessary
# for holding the day? is something that does more write-on-demand possible like
# reading data on-demand? (TODO)
num_lat_bnds = fin_dims['lat'].size
fre_logger.debug('num_lat_bnds = %s', num_lat_bnds)
num_lon_bnds = fin_dims['lon'].size
fre_logger.debug('num_lon_bnds = %s', num_lon_bnds)
# Use masked array only if there's actually masked data
if has_masked_data:
avgvals = numpy.ma.zeros((1, num_lat_bnds, num_lon_bnds), dtype = float)
else:
avgvals = numpy.zeros((1, num_lat_bnds, num_lon_bnds), dtype = float)

# this loop behavior 100% should be re-factored into generator functions.
# they should be slightly faster, and much more readable. (TODO)
# the average/stddev cpu settings should also be genfunctions, their behavior
# (e.g. stddev_pop v stddev_samp) should be set given user inputs. (TODO)
# the computations can lean on numpy.stat more- i imagine it's faster (TODO)
# parallelism via multiprocessing shouldn't be too bad- explore an alt [dask] too (TODO)
# compute average, for each lat/lon coordinate over time record in file
# compute time-averaged values using vectorized numpy operations
# this handles variables with any number of dimensions (scalar, 3-D, 4-D, etc.)
fre_logger.debug('var_data shape = %s', var_data.shape)

if not self.unwgt: #weighted case
fre_logger.info('computing weighted statistics')
for lat in range(num_lat_bnds):
lon_val_array = numpy.moveaxis( nc_fin[targ_var][:], 0, -1)[lat].copy()

for lon in range(num_lon_bnds):
tim_val_array = lon_val_array[lon].copy()
# Use numpy.ma.sum only if there are actually masked values
if has_masked_data:
avgvals[0][lat][lon] = numpy.ma.sum(tim_val_array * wgts) / wgts_sum
else:
# Use numpy.sum for consistent dtype handling across numpy versions
avgvals[0][lat][lon] = numpy.sum(tim_val_array * wgts, dtype=numpy.float64) / wgts_sum

del tim_val_array
del lon_val_array
# broadcast weights to match data dimensions: (T,) -> (T, 1, 1, ...)
wgts_shape = (num_time_bnds,) + (1,) * (var_data.ndim - 1)
wgts_expanded = wgts.reshape(wgts_shape)
if has_masked_data:
avgvals = numpy.ma.sum(
var_data * wgts_expanded, axis=0, keepdims=True
) / wgts_sum
else:
avgvals = numpy.sum(
numpy.asarray(var_data, dtype=numpy.float64) * wgts_expanded,
axis=0, keepdims=True, dtype=numpy.float64
) / wgts_sum
else: #unweighted case
fre_logger.info('computing unweighted statistics')
for lat in range(num_lat_bnds):
lon_val_array = numpy.moveaxis( nc_fin[targ_var][:], 0, -1)[lat].copy()

for lon in range(num_lon_bnds):
tim_val_array = lon_val_array[lon].copy()
# Use numpy.ma.sum only if there are actually masked values
if has_masked_data:
avgvals[0][lat][lon] = numpy.ma.sum(tim_val_array) / num_time_bnds
else:
# Use numpy.sum for consistent dtype handling across numpy versions
avgvals[0][lat][lon] = numpy.sum(tim_val_array, dtype=numpy.float64) / num_time_bnds

del tim_val_array
del lon_val_array
if has_masked_data:
avgvals = numpy.ma.sum(var_data, axis=0, keepdims=True) / num_time_bnds
else:
avgvals = numpy.sum(
numpy.asarray(var_data, dtype=numpy.float64),
axis=0, keepdims=True, dtype=numpy.float64
) / num_time_bnds



Expand Down Expand Up @@ -256,3 +231,6 @@ def generate_timavg(self, infile = None, outfile = None):
fre_logger.debug('input file closed')

return 0

# backward-compatible alias
frepytoolsTimeAverager = NumpyTimeAverager # pylint: disable=invalid-name
Loading
Loading