Skip to content

Commit e8d98f6

Browse files
committed
fix pylint issues
1 parent 6bc2ad4 commit e8d98f6

File tree

1 file changed

+63
-86
lines changed

1 file changed

+63
-86
lines changed

cmip7_prep/mapping_compat.py

Lines changed: 63 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

2-
"""Mapping loader/evaluator compatible with CMIP6-style lists and CMIP7-style dicts.
2+
# cmip7_prep/mapping_compat.py
3+
r"""Mapping loader/evaluator compatible with CMIP6-style lists and CMIP7-style dicts.
34
45
This module provides a `Mapping` class that:
56
- Loads a YAML mapping file where **keys are CMIP variable names** (preferred), or
@@ -8,36 +9,11 @@
89
- Builds CMIP variables from a native CESM `xarray.Dataset` via :meth:`realize`,
910
supporting raw variables, formulas, and simple unit conversions.
1011
11-
YAML schema (both supported)
12-
---------------------------
13-
# CMIP7-style (preferred):
14-
tas:
15-
table: Amon # or CMIP6_Amon.json / CMIP7_Amon.json
16-
units: K
17-
raw_variables: [TREFHT] # from the CESM files
18-
regrid_method: bilinear
19-
20-
# CMIP6-style (list of dicts):
21-
- name: tas
22-
table: CMIP6_Amon.json
23-
units: K
24-
raw_variables: [TREFHT]
25-
unit_conversion: null
26-
formula: null
27-
28-
Unit conversion support
29-
-----------------------
30-
- String expression using ``x`` (the realized DataArray), with ``np`` and ``xr`` available.
31-
Example: ``unit_conversion: "x * 86400.0"`` to convert kg m-2 s-1 -> mm/day for precipitation.
32-
- Dict with ``scale`` and optional ``offset`` (applied as x * scale + offset).
33-
34-
Formula support
35-
---------------
36-
- A Python expression combining raw variables by name (e.g., ``"bc_a1+bc_a4+bc_c1+bc_c4"``).
37-
Only variables present in the mapping entry are available, plus ``np``/``xr``. No builtins.
38-
39-
Safety note: formulas and conversions are evaluated in a heavily restricted environment.
40-
This is intended for trusted mapping files under your control.
12+
Security note
13+
-------------
14+
Formulas and conversion expressions are evaluated from a trusted, local mapping file
15+
with a restricted environment. We centralize the use of ``eval`` in a helper that
16+
is clearly marked and limited, and we disable the linter warning only at that line.
4117
"""
4218
from __future__ import annotations
4319

@@ -47,7 +23,7 @@
4723

4824
import numpy as np
4925
import xarray as xr
50-
import yaml
26+
import yaml # runtime dependency; ignored by pylint via .pylintrc
5127

5228

5329
def _normalize_table_name(value: Optional[str]) -> Optional[str]:
@@ -58,16 +34,17 @@ def _normalize_table_name(value: Optional[str]) -> Optional[str]:
5834
if not value:
5935
return None
6036
s = str(value)
61-
if s.lower().endswith('.json'):
37+
if s.lower().endswith(".json"):
6238
s = s[:-5]
6339
# strip CMIPx_ prefix if present
64-
if '_' in s:
65-
parts = s.split('_', 1)
66-
if len(parts) == 2 and parts[0].upper().startswith('CMIP'):
40+
if "_" in s:
41+
parts = s.split("_", 1)
42+
if len(parts) == 2 and parts[0].upper().startswith("CMIP"):
6743
s = parts[1]
6844
return s
6945

7046

47+
# pylint: disable=too-many-instance-attributes
7148
@dataclass(frozen=True)
7249
class VarConfig:
7350
"""Normalized mapping entry for a single CMIP variable."""
@@ -86,21 +63,40 @@ class VarConfig:
8663
def as_cfg(self) -> Dict[str, Any]:
8764
"""Return a plain dict view for convenience in other modules."""
8865
d = {
89-
'name': self.name,
90-
'table': self.table,
91-
'units': self.units,
92-
'raw_variables': self.raw_variables,
93-
'source': self.source,
94-
'formula': self.formula,
95-
'unit_conversion': self.unit_conversion,
96-
'positive': self.positive,
97-
'cell_methods': self.cell_methods,
98-
'levels': self.levels,
99-
'regrid_method': self.regrid_method,
66+
"name": self.name,
67+
"table": self.table,
68+
"units": self.units,
69+
"raw_variables": self.raw_variables,
70+
"source": self.source,
71+
"formula": self.formula,
72+
"unit_conversion": self.unit_conversion,
73+
"positive": self.positive,
74+
"cell_methods": self.cell_methods,
75+
"levels": self.levels,
76+
"regrid_method": self.regrid_method,
10077
}
10178
return {k: v for k, v in d.items() if v is not None}
10279

10380

81+
def _safe_eval(expr: str, local_names: Dict[str, Any]) -> Any:
82+
"""Evaluate a small arithmetic/xarray expression in a restricted environment.
83+
84+
Only the names provided in `local_names` are available, plus numpy/xarray.
85+
86+
Used for:
87+
- combining raw variables in `formula`
88+
- simple unit conversions (string form)
89+
90+
Mapping files are assumed trusted; we still minimize surface area by stripping
91+
builtins and only exposing needed names.
92+
"""
93+
safe_globals = {"__builtins__": {}}
94+
locals_safe = {"np": np, "xr": xr}
95+
locals_safe.update(local_names)
96+
# pylint: disable=eval-used
97+
return eval(expr, safe_globals, locals_safe)
98+
99+
104100
class Mapping:
105101
"""Load and evaluate a CMIP mapping YAML file.
106102
@@ -123,7 +119,7 @@ def __init__(self, path: str | Path) -> None:
123119
# -----------------
124120
@staticmethod
125121
def _load_yaml(path: Path) -> Dict[str, VarConfig]:
126-
with path.open('r', encoding='utf-8') as f:
122+
with path.open("r", encoding="utf-8") as f:
127123
data = yaml.safe_load(f)
128124

129125
result: Dict[str, VarConfig] = {}
@@ -137,9 +133,9 @@ def _load_yaml(path: Path) -> Dict[str, VarConfig]:
137133
elif isinstance(data, list):
138134
# CMIP6-style: list with 'name' field
139135
for item in data:
140-
if not isinstance(item, dict) or 'name' not in item:
136+
if not isinstance(item, dict) or "name" not in item:
141137
continue
142-
name = str(item['name'])
138+
name = str(item["name"])
143139
result[name] = _to_varconfig(name, item)
144140
else:
145141
raise TypeError("Unsupported YAML structure: expected dict or list at top level.")
@@ -171,16 +167,6 @@ def realize(self, ds: xr.Dataset, cmip_name: str) -> xr.DataArray:
171167
- `raw_variables`: list of CESM variable names (used by formula or identity)
172168
- `formula`: Python expression combining raw variables
173169
- `unit_conversion`: str expression (using `x`) or dict {scale, offset}
174-
175-
Returns
176-
-------
177-
xr.DataArray
178-
The realized variable, with attrs possibly updated to target units.
179-
180-
Raises
181-
------
182-
KeyError if required raw variables are not present in `ds`.
183-
ValueError if the mapping is incomplete or inconsistent.
184170
"""
185171
if cmip_name not in self._vars:
186172
raise KeyError(f"No mapping for {cmip_name!r} in {self.path}")
@@ -194,30 +180,30 @@ def realize(self, ds: xr.Dataset, cmip_name: str) -> xr.DataArray:
194180

195181
# set target units if provided
196182
if vc.units:
197-
da.attrs['units'] = vc.units
183+
da.attrs["units"] = vc.units
198184

199185
return da
200186

201187

202188
def _to_varconfig(name: str, cfg: TMapping[str, Any]) -> VarConfig:
203189
"""Normalize a raw YAML entry into a :class:`VarConfig`."""
204-
table = _normalize_table_name(cfg.get('table') or cfg.get('CMOR_table'))
205-
raw_vars = cfg.get('raw_variables') or cfg.get('raw_vars') or None
190+
table = _normalize_table_name(cfg.get("table") or cfg.get("CMOR_table"))
191+
raw_vars = cfg.get("raw_variables") or cfg.get("raw_vars") or None
206192
if isinstance(raw_vars, str):
207193
raw_vars = [raw_vars]
208-
levels = cfg.get('levels') or None
194+
levels = cfg.get("levels") or None
209195
vc = VarConfig(
210196
name=name,
211197
table=table,
212-
units=cfg.get('units'),
198+
units=cfg.get("units"),
213199
raw_variables=raw_vars,
214-
source=cfg.get('source'),
215-
formula=cfg.get('formula'),
216-
unit_conversion=cfg.get('unit_conversion'),
217-
positive=cfg.get('positive'),
218-
cell_methods=cfg.get('cell_methods'),
200+
source=cfg.get("source"),
201+
formula=cfg.get("formula"),
202+
unit_conversion=cfg.get("unit_conversion"),
203+
positive=cfg.get("positive"),
204+
cell_methods=cfg.get("cell_methods"),
219205
levels=levels,
220-
regrid_method=cfg.get('regrid_method'),
206+
regrid_method=cfg.get("regrid_method"),
221207
)
222208
return vc
223209

@@ -238,7 +224,7 @@ def _realize_core(ds: xr.Dataset, vc: VarConfig) -> xr.DataArray:
238224
return ds[vc.source]
239225

240226
# 2) identity mapping from a single raw variable
241-
if vc.raw_variables and vc.formula in (None, '', 'null') and len(vc.raw_variables) == 1:
227+
if vc.raw_variables and vc.formula in (None, "", "null") and len(vc.raw_variables) == 1:
242228
var = vc.raw_variables[0]
243229
if var not in ds:
244230
raise KeyError(f"raw variable {var!r} not found in dataset")
@@ -249,11 +235,9 @@ def _realize_core(ds: xr.Dataset, vc: VarConfig) -> xr.DataArray:
249235
if not vc.raw_variables:
250236
raise ValueError(f"formula given for {vc.name} but no raw_variables listed")
251237
env = _require_vars(ds, vc.raw_variables, f"realize({vc.name})")
252-
safe_globals = {'__builtins__': {}}
253-
safe_locals = {'np': np, 'xr': xr, **env}
254238
try:
255-
result = eval(vc.formula, safe_globals, safe_locals) # noqa: S307 (trusted file)
256-
except Exception as exc: # narrow scope: we treat mapping as trusted config
239+
result = _safe_eval(vc.formula, env)
240+
except Exception as exc:
257241
raise ValueError(f"Error evaluating formula for {vc.name}: {exc}") from exc
258242
if not isinstance(result, xr.DataArray):
259243
raise ValueError(f"Formula for {vc.name} did not produce a DataArray")
@@ -274,26 +258,19 @@ def _apply_unit_conversion(da: xr.DataArray, rule: Any) -> xr.DataArray:
274258
rule : str or dict
275259
- If str, evaluates an expression using ``x`` (the data), with ``np`` and ``xr`` available.
276260
- If dict, supports keys: ``scale`` and optional ``offset``.
277-
278-
Returns
279-
-------
280-
DataArray
281-
Converted data array.
282261
"""
283262
if isinstance(rule, str):
284-
safe_globals = {'__builtins__': {}}
285-
safe_locals = {'x': da, 'np': np, 'xr': xr}
286263
try:
287-
out = eval(rule, safe_globals, safe_locals) # noqa: S307 (trusted mapping)
264+
out = _safe_eval(rule, {"x": da})
288265
except Exception as exc:
289266
raise ValueError(f"Error evaluating unit_conversion expression: {exc}") from exc
290267
if not isinstance(out, xr.DataArray):
291268
raise ValueError("unit_conversion expression did not return a DataArray")
292269
return out
293270

294271
if isinstance(rule, dict):
295-
scale = rule.get('scale', 1.0)
296-
offset = rule.get('offset', 0.0)
272+
scale = rule.get("scale", 1.0)
273+
offset = rule.get("offset", 0.0)
297274
return da * float(scale) + float(offset)
298275

299276
raise TypeError("unit_conversion must be a string expression or a dict with 'scale'/'offset'")

0 commit comments

Comments
 (0)