11
2- """Mapping loader/evaluator compatible with CMIP6-style lists and CMIP7-style dicts.
2+ # cmip7_prep/mapping_compat.py
3+ r"""Mapping loader/evaluator compatible with CMIP6-style lists and CMIP7-style dicts.
34
45This module provides a `Mapping` class that:
56- Loads a YAML mapping file where **keys are CMIP variable names** (preferred), or
89- Builds CMIP variables from a native CESM `xarray.Dataset` via :meth:`realize`,
910 supporting raw variables, formulas, and simple unit conversions.
1011
11- YAML schema (both supported)
12- ---------------------------
13- # CMIP7-style (preferred):
14- tas:
15- table: Amon # or CMIP6_Amon.json / CMIP7_Amon.json
16- units: K
17- raw_variables: [TREFHT] # from the CESM files
18- regrid_method: bilinear
19-
20- # CMIP6-style (list of dicts):
21- - name: tas
22- table: CMIP6_Amon.json
23- units: K
24- raw_variables: [TREFHT]
25- unit_conversion: null
26- formula: null
27-
28- Unit conversion support
29- -----------------------
30- - String expression using ``x`` (the realized DataArray), with ``np`` and ``xr`` available.
31- Example: ``unit_conversion: "x * 86400.0"`` to convert kg m-2 s-1 -> mm/day for precipitation.
32- - Dict with ``scale`` and optional ``offset`` (applied as x * scale + offset).
33-
34- Formula support
35- ---------------
36- - A Python expression combining raw variables by name (e.g., ``"bc_a1+bc_a4+bc_c1+bc_c4"``).
37- Only variables present in the mapping entry are available, plus ``np``/``xr``. No builtins.
38-
39- Safety note: formulas and conversions are evaluated in a heavily restricted environment.
40- This is intended for trusted mapping files under your control.
12+ Security note
13+ -------------
14+ Formulas and conversion expressions are evaluated from a trusted, local mapping file
15+ with a restricted environment. We centralize the use of ``eval`` in a helper that
16+ is clearly marked and limited, and we disable the linter warning only at that line.
4117"""
4218from __future__ import annotations
4319
4723
4824import numpy as np
4925import xarray as xr
50- import yaml
26+ import yaml # runtime dependency; ignored by pylint via .pylintrc
5127
5228
5329def _normalize_table_name (value : Optional [str ]) -> Optional [str ]:
@@ -58,16 +34,17 @@ def _normalize_table_name(value: Optional[str]) -> Optional[str]:
5834 if not value :
5935 return None
6036 s = str (value )
61- if s .lower ().endswith (' .json' ):
37+ if s .lower ().endswith (" .json" ):
6238 s = s [:- 5 ]
6339 # strip CMIPx_ prefix if present
64- if '_' in s :
65- parts = s .split ('_' , 1 )
66- if len (parts ) == 2 and parts [0 ].upper ().startswith (' CMIP' ):
40+ if "_" in s :
41+ parts = s .split ("_" , 1 )
42+ if len (parts ) == 2 and parts [0 ].upper ().startswith (" CMIP" ):
6743 s = parts [1 ]
6844 return s
6945
7046
47+ # pylint: disable=too-many-instance-attributes
7148@dataclass (frozen = True )
7249class VarConfig :
7350 """Normalized mapping entry for a single CMIP variable."""
@@ -86,21 +63,40 @@ class VarConfig:
8663 def as_cfg (self ) -> Dict [str , Any ]:
8764 """Return a plain dict view for convenience in other modules."""
8865 d = {
89- ' name' : self .name ,
90- ' table' : self .table ,
91- ' units' : self .units ,
92- ' raw_variables' : self .raw_variables ,
93- ' source' : self .source ,
94- ' formula' : self .formula ,
95- ' unit_conversion' : self .unit_conversion ,
96- ' positive' : self .positive ,
97- ' cell_methods' : self .cell_methods ,
98- ' levels' : self .levels ,
99- ' regrid_method' : self .regrid_method ,
66+ " name" : self .name ,
67+ " table" : self .table ,
68+ " units" : self .units ,
69+ " raw_variables" : self .raw_variables ,
70+ " source" : self .source ,
71+ " formula" : self .formula ,
72+ " unit_conversion" : self .unit_conversion ,
73+ " positive" : self .positive ,
74+ " cell_methods" : self .cell_methods ,
75+ " levels" : self .levels ,
76+ " regrid_method" : self .regrid_method ,
10077 }
10178 return {k : v for k , v in d .items () if v is not None }
10279
10380
81+ def _safe_eval (expr : str , local_names : Dict [str , Any ]) -> Any :
82+ """Evaluate a small arithmetic/xarray expression in a restricted environment.
83+
84+ Only the names provided in `local_names` are available, plus numpy/xarray.
85+
86+ Used for:
87+ - combining raw variables in `formula`
88+ - simple unit conversions (string form)
89+
90+ Mapping files are assumed trusted; we still minimize surface area by stripping
91+ builtins and only exposing needed names.
92+ """
93+ safe_globals = {"__builtins__" : {}}
94+ locals_safe = {"np" : np , "xr" : xr }
95+ locals_safe .update (local_names )
96+ # pylint: disable=eval-used
97+ return eval (expr , safe_globals , locals_safe )
98+
99+
104100class Mapping :
105101 """Load and evaluate a CMIP mapping YAML file.
106102
@@ -123,7 +119,7 @@ def __init__(self, path: str | Path) -> None:
123119 # -----------------
124120 @staticmethod
125121 def _load_yaml (path : Path ) -> Dict [str , VarConfig ]:
126- with path .open ('r' , encoding = ' utf-8' ) as f :
122+ with path .open ("r" , encoding = " utf-8" ) as f :
127123 data = yaml .safe_load (f )
128124
129125 result : Dict [str , VarConfig ] = {}
@@ -137,9 +133,9 @@ def _load_yaml(path: Path) -> Dict[str, VarConfig]:
137133 elif isinstance (data , list ):
138134 # CMIP6-style: list with 'name' field
139135 for item in data :
140- if not isinstance (item , dict ) or ' name' not in item :
136+ if not isinstance (item , dict ) or " name" not in item :
141137 continue
142- name = str (item [' name' ])
138+ name = str (item [" name" ])
143139 result [name ] = _to_varconfig (name , item )
144140 else :
145141 raise TypeError ("Unsupported YAML structure: expected dict or list at top level." )
@@ -171,16 +167,6 @@ def realize(self, ds: xr.Dataset, cmip_name: str) -> xr.DataArray:
171167 - `raw_variables`: list of CESM variable names (used by formula or identity)
172168 - `formula`: Python expression combining raw variables
173169 - `unit_conversion`: str expression (using `x`) or dict {scale, offset}
174-
175- Returns
176- -------
177- xr.DataArray
178- The realized variable, with attrs possibly updated to target units.
179-
180- Raises
181- ------
182- KeyError if required raw variables are not present in `ds`.
183- ValueError if the mapping is incomplete or inconsistent.
184170 """
185171 if cmip_name not in self ._vars :
186172 raise KeyError (f"No mapping for { cmip_name !r} in { self .path } " )
@@ -194,30 +180,30 @@ def realize(self, ds: xr.Dataset, cmip_name: str) -> xr.DataArray:
194180
195181 # set target units if provided
196182 if vc .units :
197- da .attrs [' units' ] = vc .units
183+ da .attrs [" units" ] = vc .units
198184
199185 return da
200186
201187
202188def _to_varconfig (name : str , cfg : TMapping [str , Any ]) -> VarConfig :
203189 """Normalize a raw YAML entry into a :class:`VarConfig`."""
204- table = _normalize_table_name (cfg .get (' table' ) or cfg .get (' CMOR_table' ))
205- raw_vars = cfg .get (' raw_variables' ) or cfg .get (' raw_vars' ) or None
190+ table = _normalize_table_name (cfg .get (" table" ) or cfg .get (" CMOR_table" ))
191+ raw_vars = cfg .get (" raw_variables" ) or cfg .get (" raw_vars" ) or None
206192 if isinstance (raw_vars , str ):
207193 raw_vars = [raw_vars ]
208- levels = cfg .get (' levels' ) or None
194+ levels = cfg .get (" levels" ) or None
209195 vc = VarConfig (
210196 name = name ,
211197 table = table ,
212- units = cfg .get (' units' ),
198+ units = cfg .get (" units" ),
213199 raw_variables = raw_vars ,
214- source = cfg .get (' source' ),
215- formula = cfg .get (' formula' ),
216- unit_conversion = cfg .get (' unit_conversion' ),
217- positive = cfg .get (' positive' ),
218- cell_methods = cfg .get (' cell_methods' ),
200+ source = cfg .get (" source" ),
201+ formula = cfg .get (" formula" ),
202+ unit_conversion = cfg .get (" unit_conversion" ),
203+ positive = cfg .get (" positive" ),
204+ cell_methods = cfg .get (" cell_methods" ),
219205 levels = levels ,
220- regrid_method = cfg .get (' regrid_method' ),
206+ regrid_method = cfg .get (" regrid_method" ),
221207 )
222208 return vc
223209
@@ -238,7 +224,7 @@ def _realize_core(ds: xr.Dataset, vc: VarConfig) -> xr.DataArray:
238224 return ds [vc .source ]
239225
240226 # 2) identity mapping from a single raw variable
241- if vc .raw_variables and vc .formula in (None , '' , ' null' ) and len (vc .raw_variables ) == 1 :
227+ if vc .raw_variables and vc .formula in (None , "" , " null" ) and len (vc .raw_variables ) == 1 :
242228 var = vc .raw_variables [0 ]
243229 if var not in ds :
244230 raise KeyError (f"raw variable { var !r} not found in dataset" )
@@ -249,11 +235,9 @@ def _realize_core(ds: xr.Dataset, vc: VarConfig) -> xr.DataArray:
249235 if not vc .raw_variables :
250236 raise ValueError (f"formula given for { vc .name } but no raw_variables listed" )
251237 env = _require_vars (ds , vc .raw_variables , f"realize({ vc .name } )" )
252- safe_globals = {'__builtins__' : {}}
253- safe_locals = {'np' : np , 'xr' : xr , ** env }
254238 try :
255- result = eval (vc .formula , safe_globals , safe_locals ) # noqa: S307 (trusted file )
256- except Exception as exc : # narrow scope: we treat mapping as trusted config
239+ result = _safe_eval (vc .formula , env )
240+ except Exception as exc :
257241 raise ValueError (f"Error evaluating formula for { vc .name } : { exc } " ) from exc
258242 if not isinstance (result , xr .DataArray ):
259243 raise ValueError (f"Formula for { vc .name } did not produce a DataArray" )
@@ -274,26 +258,19 @@ def _apply_unit_conversion(da: xr.DataArray, rule: Any) -> xr.DataArray:
274258 rule : str or dict
275259 - If str, evaluates an expression using ``x`` (the data), with ``np`` and ``xr`` available.
276260 - If dict, supports keys: ``scale`` and optional ``offset``.
277-
278- Returns
279- -------
280- DataArray
281- Converted data array.
282261 """
283262 if isinstance (rule , str ):
284- safe_globals = {'__builtins__' : {}}
285- safe_locals = {'x' : da , 'np' : np , 'xr' : xr }
286263 try :
287- out = eval (rule , safe_globals , safe_locals ) # noqa: S307 (trusted mapping )
264+ out = _safe_eval (rule , { "x" : da } )
288265 except Exception as exc :
289266 raise ValueError (f"Error evaluating unit_conversion expression: { exc } " ) from exc
290267 if not isinstance (out , xr .DataArray ):
291268 raise ValueError ("unit_conversion expression did not return a DataArray" )
292269 return out
293270
294271 if isinstance (rule , dict ):
295- scale = rule .get (' scale' , 1.0 )
296- offset = rule .get (' offset' , 0.0 )
272+ scale = rule .get (" scale" , 1.0 )
273+ offset = rule .get (" offset" , 0.0 )
297274 return da * float (scale ) + float (offset )
298275
299276 raise TypeError ("unit_conversion must be a string expression or a dict with 'scale'/'offset'" )
0 commit comments