Skip to content

Commit d48fa81

Browse files
committed
fully parallel Amon script
1 parent fc86d26 commit d48fa81

File tree

5 files changed

+211
-10
lines changed

5 files changed

+211
-10
lines changed

cmip7_prep/data/cesm_to_cmip7.yaml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,16 +279,20 @@ variables:
279279
units: "W m-2"
280280
dims: [time, lat, lon]
281281
positive: up
282+
formula: FSDS - FSNS
282283
sources:
283-
- cesm_var: FSUS
284+
- cesm_var: FSDS
285+
- cesm_var: FSNS
284286

285287
rsuscs:
286288
table: Amon
287289
units: "W m-2"
288290
dims: [time, lat, lon]
289291
positive: up
292+
formula: FSDSC - FSNSC
290293
sources:
291-
- cesm_var: FSUSC
294+
- cesm_var: FSDSC
295+
- cesm_var: FSNSC
292296

293297
rsut:
294298
table: Amon
@@ -302,8 +306,10 @@ variables:
302306
table: Amon
303307
units: "W m-2"
304308
dims: [time, lat, lon]
309+
formula: SOLIN - FSNTOAC
305310
sources:
306-
- cesm_var: FSUTOAC
311+
- cesm_var: SOLIN
312+
- cesm_var: FSNTOAC
307313

308314
sfcWind:
309315
table: Amon
@@ -407,7 +413,7 @@ variables:
407413
units: "m s-1"
408414
dims: [time, lat, lon]
409415
sources:
410-
- cesm_var: V10
416+
- cesm_var: VBOT
411417

412418
wap:
413419
table: Amon

pyproject.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ include = [
1212

1313
[tool.poetry.dependencies]
1414
python = ">=3.10,<3.14"
15-
xarray = "^2024.5.0"
16-
netCDF4 = "^1.6.5"
17-
cftime = "^1.6.3"
15+
xarray = "^2024.9.0"
16+
netCDF4 = "^1.7.2"
17+
cftime = "^1.6.4"
1818
numpy = "^2.0.0"
19-
pandas = "^2.2.2"
20-
xesmf = "^0.8.5"
21-
typer = { version = "^0.12.3", extras = ["all"] }
19+
pandas = "^2.3.2"
20+
xesmf = "^0.8.10"
21+
typer = { version = "^0.19.1", extras = ["all"] }
2222
PyYAML = "^6.0.2"
2323
# CMOR typically comes from conda-forge; if pip isn't viable, install it outside Poetry or via a wheel.
2424
# cmor = "^3.8.0"

scripts/atm_monthly.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import os
2+
from pathlib import Path
3+
import logging
4+
5+
import xarray as xr
6+
from cmip7_prep.mapping_compat import Mapping
7+
from cmip7_prep.pipeline import realize_regrid_prepare, open_native_for_cmip_vars
8+
from cmip7_prep.cmor_writer import CmorSession
9+
from cmip7_prep.dreq_search import find_variables_by_prefix
10+
from gents.hfcollection import HFCollection
11+
from gents.timeseries import TSCollection
12+
from dask.distributed import LocalCluster
13+
from dask.distributed import Client
14+
from dask import delayed
15+
from datetime import datetime, UTC
16+
17+
import dask
18+
from dask import delayed
19+
20+
TABLES = "/glade/work/cmip7/e3sm_to_cmip/cmip6-cmor-tables/Tables"
21+
OUTDIR = Path("/glade/derecho/scratch/cmip7/CMIP7")
22+
23+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
24+
25+
26+
@delayed
27+
def process_one_var(varname: str) -> tuple[str, str]:
28+
"""Compute+write one CMIP variable. Returns (varname, 'ok' or error message)."""
29+
try:
30+
# Realize → verticalize (if needed) → regrid for a single variable
31+
ds_cmor = realize_regrid_prepare(
32+
mapping,
33+
ds_native,
34+
varname,
35+
tables_path=TABLES,
36+
time_chunk=12,
37+
regrid_kwargs={
38+
"output_time_chunk": 12,
39+
"dtype": "float32",
40+
"bilinear_map": Path(
41+
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_bilin.nc"
42+
),
43+
"conservative_map": Path(
44+
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_aave.nc"
45+
),
46+
},
47+
)
48+
49+
# Unique log per *run* is in your CmorSession; still fine to reuse here.
50+
log_dir = OUTDIR / "logs"
51+
log_dir.mkdir(parents=True, exist_ok=True)
52+
53+
with CmorSession(
54+
tables_path=TABLES,
55+
# one log file per worker/run (timestamp + var suffix helps debugging)
56+
log_dir=log_dir,
57+
log_name=f"cmor_{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}_{varname}.log",
58+
dataset_attrs={"institution_id": "NCAR"}, # plus your other attrs if needed
59+
) as cm:
60+
# vdef from mapping cfg
61+
cfg = mapping.get_cfg(varname)
62+
vdef = type(
63+
"VDef",
64+
(),
65+
{
66+
"name": varname,
67+
"table": cfg.get("table", "Amon"),
68+
"units": cfg.get("units", ""),
69+
"dims": cfg.get("dims", []),
70+
"positive": cfg.get("positive", None),
71+
"cell_methods": cfg.get("cell_methods", None),
72+
"long_name": cfg.get("long_name", None),
73+
"standard_name": cfg.get("standard_name", None),
74+
"levels": cfg.get("levels", None),
75+
},
76+
)()
77+
78+
# Your writer expects a dataset with varname present:
79+
cm.write_variable(ds_cmor, varname, vdef, outdir=OUTDIR)
80+
81+
return (varname, "ok")
82+
except Exception as e: # keep task alive; report failure
83+
return (varname, f"ERROR: {e!r}")
84+
85+
86+
if __name__ == "__main__":
87+
88+
# Only atm monthly 32 bit
89+
include_pattern = "*cam.h0a.*"
90+
# Only atm monthly 64 bit
91+
# include_pattern = "*cam.h0a*"
92+
93+
cluster = LocalCluster(n_workers=128, threads_per_worker=1, memory_limit="235GB")
94+
client = cluster.get_client()
95+
input_head_dir = "/glade/derecho/scratch/cmip7/archive/b.e30_beta06.B1850C_LTso.ne30_t232_wgx3.192.wrkflw.1/atm/hist_amon32"
96+
output_head_dir = "/glade/derecho/scratch/cmip7/archive/timeseries/b.e30_beta06.B1850C_LTso.ne30_t232_wgx3.192.wrkflw.1/atm/hist"
97+
hf_collection = HFCollection(input_head_dir, dask_client=client)
98+
hf_collection = hf_collection.include_patterns([include_pattern])
99+
100+
hf_collection.pull_metadata()
101+
ts_collection = TSCollection(
102+
hf_collection, output_head_dir, ts_orders=None, dask_client=client
103+
)
104+
ts_collection = ts_collection.apply_overwrite("*")
105+
ts_collection.execute()
106+
107+
# 0) Load mapping (uses packaged data/cesm_to_cmip7.yaml by default)
108+
mapping = Mapping.from_packaged_default()
109+
110+
cmip_vars = find_variables_by_prefix(
111+
None, "Amon.", include_groups={"baseline_monthly"}
112+
)
113+
print(f"CMORIZING {len(cmip_vars)} variables")
114+
basedir = Path(output_head_dir)
115+
# 1) Load requested variables
116+
ds_native, cmip_vars = open_native_for_cmip_vars(
117+
cmip_vars,
118+
os.path.join(basedir, include_pattern),
119+
mapping,
120+
use_cftime=True,
121+
parallel=True,
122+
)
123+
124+
futs = [process_one_var(v) for v in cmip_vars]
125+
results = dask.compute(*futs) # blocks until all finish
126+
127+
for v, status in results:
128+
print(v, "→", status)

scripts/fullamon.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/env bash
2+
#PBS -l select=1:ncpus=128:mem=235GB
3+
#PBS -N fullamon_cmor_processing
4+
#PBS -A CESM0024
5+
#PBS -q develop
6+
#PBS -l walltime=00:30:00
7+
#PBS -j oe
8+
9+
module load conda
10+
conda activate /glade/work/cmip7/conda-envs/CMOR
11+
12+
python /glade/work/cmip7/cmip7-prep/scripts/atm_monthly.py

scripts/testbaseline_monthly.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
from pathlib import Path
3+
import xarray as xr
4+
from cmip7_prep.mapping_compat import Mapping
5+
from cmip7_prep.pipeline import realize_regrid_prepare_many, open_native_for_cmip_vars
6+
from cmip7_prep.cmor_writer import CmorSession
7+
from cmip7_prep.dreq_search import find_variables_by_prefix
8+
9+
basedir = Path(
10+
"/glade/derecho/scratch/cmip7/archive/timeseries/b.e30_beta06.B1850C_LTso.ne30_t232_wgx3.192.wrkflw.1/atm/hist/"
11+
)
12+
13+
# 0) Load mapping (uses packaged data/cesm_to_cmip7.yaml by default)
14+
mapping = Mapping.from_packaged_default()
15+
16+
cmip_vars = find_variables_by_prefix(None, "Amon.", include_groups={"baseline_monthly"})
17+
cmip_vars = ["cl"]
18+
print(f"CMORIZING {len(cmip_vars)} variables")
19+
# 1) Load requested variables
20+
ds_native, cmip_vars = open_native_for_cmip_vars(
21+
cmip_vars,
22+
os.path.join(basedir, "*cam.h0*"),
23+
mapping,
24+
use_cftime=True,
25+
parallel=True,
26+
)
27+
28+
# 2) One call: realize → chunk → regrid → carry time+bounds
29+
30+
ds_cmor = realize_regrid_prepare_many(
31+
mapping,
32+
ds_native,
33+
cmip_vars,
34+
time_chunk=12,
35+
tables_path="/glade/work/cmip7/e3sm_to_cmip/cmip6-cmor-tables/Tables",
36+
regrid_kwargs={
37+
"output_time_chunk": 12,
38+
"dtype": "float32",
39+
"bilinear_map": Path(
40+
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_bilin.nc"
41+
),
42+
"conservative_map": Path(
43+
"/glade/campaign/cesm/cesmdata/inputdata/cpl/gridmaps/ne30pg3/map_ne30pg3_to_1x1d_aave.nc"
44+
),
45+
},
46+
)
47+
48+
outdir = Path("/glade/derecho/scratch/cmip7/CMIP7")
49+
50+
# 3) CMOR write
51+
with CmorSession(
52+
tables_path="/glade/work/cmip7/e3sm_to_cmip/cmip6-cmor-tables/Tables",
53+
log_dir=Path(outdir / "logs"),
54+
) as cm:
55+
cm.write_variables(ds_cmor, cmip_vars, mapping, outdir=outdir)

0 commit comments

Comments
 (0)