Skip to content
4 changes: 3 additions & 1 deletion cmip7_prep/data/cesm_to_cmip7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ variables:
sos:
table: Omon
units: "1e-3"
dims: [time, j, i]
dims:
- [time, lat, lon]
regrid_method: conservative
sources:
- cesm_var: sos

Expand Down
3 changes: 3 additions & 0 deletions cmip7_prep/mapping_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class VarConfig:
regrid_method: Optional[str] = None
long_name: Optional[str] = None
standard_name: Optional[str] = None
dims: Optional[List[str]] = None

def as_cfg(self) -> Dict[str, Any]:
"""Return a plain dict view for convenience in other modules.
Expand All @@ -68,6 +69,7 @@ def as_cfg(self) -> Dict[str, Any]:
"regrid_method": self.regrid_method,
"long_name": self.long_name,
"standard_name": self.standard_name,
"dims": self.dims,
}
return {k: v for k, v in d.items() if v is not None}

Expand Down Expand Up @@ -270,6 +272,7 @@ def _to_varconfig(name: str, cfg: TMapping[str, Any]) -> VarConfig:
regrid_method=cfg.get("regrid_method"),
long_name=cfg.get("long_name"),
standard_name=cfg.get("standard_name"),
dims=cfg.get("dims"),
)
return vc

Expand Down
37 changes: 26 additions & 11 deletions cmip7_prep/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path
from typing import Optional, Sequence, Union, Dict, List
import re
from venv import logger
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logger is being imported from the venv module, which is incorrect. The venv module is for creating virtual environments and does not provide a logger. This should likely be from logging import getLogger or use the existing logging setup pattern from other files in the project.

Suggested change
from venv import logger
import logging

Copilot uses AI. Check for mistakes.
import warnings
import glob
import sys
Expand Down Expand Up @@ -201,6 +202,7 @@ def realize_regrid_prepare(
*,
tables_path: Optional[Union[str, Path]] = None,
time_chunk: Optional[int] = 12,
mom6_grid: Optional[Dict[str, xr.DataArray]] = None,
regrid_kwargs: Optional[dict] = None,
open_kwargs: Optional[dict] = None,
) -> xr.Dataset:
Expand All @@ -211,7 +213,7 @@ def realize_regrid_prepare(
"""
regrid_kwargs = dict(regrid_kwargs or {})
open_kwargs = dict(open_kwargs or {})

aux = []
# 1) Get native dataset
if isinstance(ds_or_glob, xr.Dataset):
ds_native = ds_or_glob
Expand All @@ -220,7 +222,23 @@ def realize_regrid_prepare(
ds_native = open_native_for_cmip_vars(
[cmip_var], ds_or_glob, mapping, **open_kwargs
)
if not "landfrac" in ds_native and not "ncol" in ds_native:
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the idiomatic in operator format: if 'landfrac' not in ds_native and 'ncol' not in ds_native: for better readability and consistency with Python conventions.

Suggested change
if not "landfrac" in ds_native and not "ncol" in ds_native:
if "landfrac" not in ds_native and "ncol" not in ds_native:

Copilot uses AI. Check for mistakes.
logger.info("Variable has no 'landfrac' or 'ncol' dim; assuming ocn variable.")
# Add MOM6 grid info if provided
if mom6_grid:
aux = ["geolat", "geolon", "geolat_c", "geolon_c"]
ds_native["geolat"] = xr.DataArray(mom6_grid[0], dims=("yh", "xh"))
ds_native["geolon"] = xr.DataArray(mom6_grid[1], dims=("yh", "xh"))
ds_native["geolat_c"] = xr.DataArray(mom6_grid[2], dims=("yhp", "xhp"))
ds_native["geolon_c"] = xr.DataArray(mom6_grid[3], dims=("yhp", "xhp"))

else:
logger.error("No MOM6 grid info provided; geolat/geolon not added.")
return (
cmip_var,
f"ERROR: MOM6 grid information is required for variable {cmip_var} "
"but was not provided.",
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function should return xr.Dataset according to its signature, but this error path returns a tuple. This type mismatch will cause issues for callers expecting a Dataset. Consider raising an exception instead of returning an error tuple.

Suggested change
return (
cmip_var,
f"ERROR: MOM6 grid information is required for variable {cmip_var} "
"but was not provided.",
raise ValueError(
f"MOM6 grid information is required for variable {cmip_var} but was not provided."

Copilot uses AI. Check for mistakes.
)
# 2) Realize the target variable
ds_v = mapping.realize(ds_native, cmip_var)
da = ds_v if isinstance(ds_v, xr.DataArray) else ds_v[cmip_var]
Expand All @@ -247,7 +265,11 @@ def realize_regrid_prepare(
# can produce PS(time,lat,lon)
if "PS" in ds_native and "PS" not in ds_vars:
ds_vars = ds_vars.assign(PS=ds_native["PS"])

aux = [
nm
for nm in ("hyai", "hybi", "hyam", "hybm", "P0", "ilev", "lev")
if nm in ds_native
]
# 5) Apply vertical transform if needed (plev19, etc.).
# Single-var helper already takes cfg + tables_path
ds_vert = _apply_vertical_if_needed(
Expand All @@ -263,15 +285,8 @@ def realize_regrid_prepare(
ds_vert, names_to_regrid, time_from=ds_native, **regrid_kwargs
)

# 7) If hybrid: merge in 1-D hybrid coefficients directly from native (no regridding needed)
if is_hybrid:
aux = [
nm
for nm in ("hyai", "hybi", "hyam", "hybm", "P0", "ilev", "lev")
if nm in ds_native
]
if aux:
ds_regr = ds_regr.merge(ds_native[aux], compat="override")
if aux:
ds_regr = ds_regr.merge(ds_native[aux], compat="override")

return ds_regr

Expand Down
84 changes: 64 additions & 20 deletions cmip7_prep/regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,19 @@
_HAS_DASK = False

# Default weight maps; override via function args.
DEFAULT_CONS_MAP = Path(
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_aave.nc"
INPUTDATA_DIR = Path("/glade/campaign/cesm/cesmdata/inputdata/")
DEFAULT_CONS_MAP_NE30 = Path(
INPUTDATA_DIR / "cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_aave.nc"
)
DEFAULT_BILIN_MAP = Path(
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_bilin.nc"
DEFAULT_BILIN_MAP_NE30 = Path(
INPUTDATA_DIR / "cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_bilin.nc"
) # optional bilinear map

DEFAULT_CONS_MAP_T232 = Path(
INPUTDATA_DIR / "cpl/gridmaps/tx2_3v2/map_t232_TO_1x1d_aave.251023.nc"
)
DEFAULT_BILIN_MAP_T232 = Path(
INPUTDATA_DIR / "cpl/gridmaps/tx2_3v2/map_t232_TO_1x1d_blin.251023.nc"
) # optional bilinear map

# Variables treated as "intensive" → prefer bilinear when available.
Expand Down Expand Up @@ -413,10 +421,15 @@ def _pick_maps(
conservative_map: Optional[Path] = None,
bilinear_map: Optional[Path] = None,
force_method: Optional[str] = None,
realm: Optional[str] = None,
) -> MapSpec:
"""Choose which precomputed map file to use for a variable."""
cons = Path(conservative_map) if conservative_map else DEFAULT_CONS_MAP
bilin = Path(bilinear_map) if bilinear_map else DEFAULT_BILIN_MAP
if realm == "ocn":
cons = Path(conservative_map) if conservative_map else DEFAULT_CONS_MAP_T232
bilin = Path(bilinear_map) if bilinear_map else DEFAULT_BILIN_MAP_T232
else:
cons = Path(conservative_map) if conservative_map else DEFAULT_CONS_MAP_NE30
bilin = Path(bilinear_map) if bilinear_map else DEFAULT_BILIN_MAP_NE30

if force_method:
if force_method not in {"conservative", "bilinear"}:
Expand Down Expand Up @@ -484,13 +497,19 @@ def regrid_to_1deg_ds(
# Attach time (and bounds) from the original dataset if requested
if time_from is not None:
ds_out = _attach_time_and_bounds(ds_out, time_from)

if "ncol" in ds_in.dims:
realm = "atm"
elif "lndgrid" in ds_in.dims:
realm = "lnd"
else:
realm = "ocn"
# Pick the mapfile you used for conservative/bilinear selection
spec = _pick_maps(
varnames[0] if isinstance(varnames, list) else varnames,
conservative_map=conservative_map,
bilinear_map=bilinear_map,
force_method="conservative",
realm=realm,
) # fx always conservative
print(f"using fx map: {spec.path}")
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This print statement should be replaced with a logger call for consistency with the rest of the codebase changes in this PR, which systematically replaced print statements with proper logging.

Suggested change
print(f"using fx map: {spec.path}")
logger.info(f"using fx map: {spec.path}")

Copilot uses AI. Check for mistakes.
ds_fx = _regrid_fx_once(spec.path, ds_in) # ← uses cache
Expand Down Expand Up @@ -569,12 +588,22 @@ def regrid_to_1deg(
"""
if varname not in ds_in:
raise KeyError(f"{varname!r} not in dataset.")

realm = None
var_da = ds_in[varname] # always a DataArray

da2, non_spatial, hdim = _ensure_ncol_last(var_da)
if hdim == "lndgrid":
da2 = _normalize_land_field(da2, ds_in)
if "ncol" not in var_da.dims and "lndgrid" not in var_da.dims:
logger.info("Variable has no 'ncol' or 'lndgrid' dim; assuming ocn variable.")
hdim = "tripolar"
da2 = var_da # Use the DataArray, not the whole Dataset
non_spatial = [d for d in da2.dims if d not in ("yh", "xh")]
realm = "ocn"
method = method or "bilinear" # force bilinear for ocn
else:
da2, non_spatial, hdim = _ensure_ncol_last(var_da)
if hdim == "lndgrid":
da2 = _normalize_land_field(da2, ds_in)
realm = "lnd"
elif hdim == "ncol":
realm = "atm"
Comment on lines 444 to 463
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition assumes that any variable without 'ncol' or 'lndgrid' is an ocean variable. This logic is fragile and could fail if new dimension types are added. Consider checking explicitly for expected ocean dimensions ('xh', 'yh') or adding a realm parameter instead of inferring from absence of dimensions.

Suggested change
if "ncol" not in var_da.dims and "lndgrid" not in var_da.dims:
logger.info("Variable has no 'ncol' or 'lndgrid' dim; assuming ocn variable.")
hdim = "tripolar"
da2 = var_da # Use the DataArray, not the whole Dataset
non_spatial = [d for d in da2.dims if d not in ("yh", "xh")]
realm = "ocn"
method = method or "bilinear" # force bilinear for ocn
else:
da2, non_spatial, hdim = _ensure_ncol_last(var_da)
if hdim == "lndgrid":
da2 = _normalize_land_field(da2, ds_in)
realm = "lnd"
elif hdim == "ncol":
realm = "atm"
if "xh" in var_da.dims and "yh" in var_da.dims:
logger.info("Variable has 'xh' and 'yh' dims; treating as ocean variable.")
hdim = "tripolar"
da2 = var_da # Use the DataArray, not the whole Dataset
non_spatial = [d for d in da2.dims if d not in ("yh", "xh")]
realm = "ocn"
method = method or "bilinear" # force bilinear for ocn
elif "ncol" in var_da.dims or "lndgrid" in var_da.dims:
da2, non_spatial, hdim = _ensure_ncol_last(var_da)
if hdim == "lndgrid":
da2 = _normalize_land_field(da2, ds_in)
realm = "lnd"
elif hdim == "ncol":
realm = "atm"
else:
raise ValueError(
f"Cannot determine grid/realm for variable '{varname}': dims={var_da.dims!r}. "
"Expected 'ncol', 'lndgrid', or both 'xh' and 'yh'."
)

Copilot uses AI. Check for mistakes.

# cast to save memory
if dtype is not None and str(da2.dtype) != dtype:
Expand All @@ -589,25 +618,39 @@ def regrid_to_1deg(
conservative_map=conservative_map,
bilinear_map=bilinear_map,
force_method=method,
realm=realm,
)
print(
f"Regridding {varname} using {spec.method_label} map: {spec.path} for realm {realm}"
)
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This print statement should be replaced with a logger call for consistency with the rest of the codebase changes in this PR.

Copilot uses AI. Check for mistakes.
regridder = _RegridderCache.get(spec.path, spec.method_label)

# tell xESMF to produce chunked output
kwargs = {}
if "time" in da2.dims and output_time_chunk:
kwargs["output_chunks"] = {"time": output_time_chunk}

da2_2d = (
da2.rename({hdim: "lon"})
.expand_dims({"lat": 1}) # add a dummy 'lat' of length 1
.transpose(*non_spatial, "lat", "lon") # ensure last two dims are ('lat','lon')
if realm in ("atm", "lnd"):
da2_2d = (
da2.rename({hdim: "lon"})
.expand_dims({"lat": 1}) # add a dummy 'lat' of length 1
.transpose(
*non_spatial, "lat", "lon"
) # ensure last two dims are ('lat','lon')
)
else:
da2_2d = da2.rename({"xh": "lon", "yh": "lat"}).transpose(
*non_spatial, "lon", "lat"
)
da2_2d = da2_2d.assign_coords(lon=((da2_2d.lon % 360)))
print(
f"da2_2d range: {da2_2d['lat'].min().item()} to {da2_2d['lat'].max().item()} lat, "
f"{da2_2d['lon'].min().item()} to {da2_2d['lon'].max().item()} lon"
)
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This print statement should be replaced with a logger call (likely logger.debug() given it's diagnostic information) for consistency with the rest of the codebase changes in this PR.

Copilot uses AI. Check for mistakes.

out_norm = regridder(da2_2d, **kwargs)
if hdim == "lndgrid":
out_norm = regridder(da2_2d, **kwargs)
out = _denormalize_land_field(out_norm, ds_in, spec.path)
else:
# default path (atm or no landfrac/sftlf available)
# default path (atm, ocn or no landfrac/sftlf available)
out = regridder(da2_2d, **kwargs)
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regridder is called inside the if hdim == 'lndgrid' block but not before it. For ocean realm processing (lines 640-644), out_norm is never assigned before the denormalization check at line 649, which would cause an UnboundLocalError. The regridder call should be moved before the if hdim == 'lndgrid' check or the logic restructured to handle all realms consistently.

Copilot uses AI. Check for mistakes.

# --- NEW: robust lat/lon assignment based on destination grid lengths ---
Expand Down Expand Up @@ -898,6 +941,7 @@ def get(cls, mapfile: Path, method_label: str) -> xe.Regridder:
if not mapfile.exists():
raise FileNotFoundError(f"Regrid weights not found: {mapfile}")
ds_in, ds_out = _make_dummy_grids(mapfile)
logger.info("Creating xESMF Regridder from weights: %s", mapfile)
cls._cache[mapfile] = xe.Regridder(
ds_in,
ds_out,
Expand Down
3 changes: 2 additions & 1 deletion scripts/fullLmon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

module load conda
conda activate CMORDEV
poetry run python ./scripts/monthly_cmor.py --realm lnd --test --workers 32 --skip-timeseries
NCPUS=$(cat $PBS_NODEFILE | wc -l)
poetry run python ./scripts/monthly_cmor.py --realm lnd --test --workers $NCPUS --skip-timeseries

#/glade/u/home/cmip7/cases/b.e30_beta06.B1850C_LTso.ne30_t232_wgx3.192.wrkflw.1 /glade/work/hannay/cesm_tags/cesm3_0_beta06/cime
4 changes: 2 additions & 2 deletions scripts/fullamon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

module load conda
conda activate CMORDEV

poetry run python ./scripts/monthly_cmor.py --realm atm --test --workers 32 --skip-timeseries
NCPUS=$(cat $PBS_NODEFILE | wc -l)
poetry run python ./scripts/monthly_cmor.py --realm atm --test --workers $NCPUS --skip-timeseries

#/glade/u/home/cmip7/cases/b.e30_beta06.B1850C_LTso.ne30_t232_wgx3.192.wrkflw.1 /glade/work/hannay/cesm_tags/cesm3_0_beta06/cime
Loading