Skip to content

Commit ecf20d9

Browse files
Add pad_year function for xarray time slicing with years <1000 (#972)
1 parent 93c067f commit ecf20d9

File tree

9 files changed

+232
-14
lines changed

9 files changed

+232
-14
lines changed

.github/workflows/build_workflow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ jobs:
8080
uses: conda-incubator/setup-miniconda@v3
8181
with:
8282
activate-environment: "e3sm_diags_ci"
83+
environment-file: conda-env/ci.yml
8384
miniforge-variant: Miniforge3
8485
miniforge-version: latest
85-
environment-file: conda-env/ci.yml
8686
channel-priority: strict
8787
auto-update-conda: true
8888
python-version: ${{ matrix.python-version }}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
This script is designed to debug a specific issue in the E3SM Diagnostics tool
3+
(e3sm_diags) related to the tropical_subseasonal set. The bug occurs when
4+
processing year values less than 1000, resulting in the error:
5+
"ValueError: no ISO-8601 or cftime-string-like match for string: 1-01-01".
6+
This issue was reported in PR #971.
7+
8+
The script replicates the behavior of the following command-line invocation
9+
of e3sm_diags:
10+
11+
e3sm_diags tropical_subseasonal --no_viewer --reference_data_path '/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/time-series' --test_data_path '/lcrc/group/e3sm2/ac.xzheng/E3SMv3_dev/20250404.wcycl1850.ne120pg2_r025_RRSwISC6to18E3r5.test4.chrysalis//post/atm/180x360_traave/ts/daily/1yr' --results_dir '/lcrc/group/e3sm/public_html/diagnostic_output/ac.zhang40/tests/tropical_subseasonal_time_fix' --case_id 'wavenumber-frequency' --ref_timeseries_input --test_timeseries_input --run_type 'model_vs_obs' --sets 'tropical_subseasonal' --variables 'PRECT' --seasons 'ANN' 'DJF' 'MAM' 'JJA' 'SON' --regions '15S15N' --regrid_tool 'xesmf' --regrid_method 'conservative_normed' --multiprocessing --num_workers '32' --backend 'cartopy' --output_format 'png' --output_format_subplot 'pdf' --canvas_size_w '1212' --canvas_size_h '1628' --figsize '8.5' '11.0' --dpi '150' --arrows --test_name 'v3.HR_test4' --short_test_name 'v3.HR_test4' --test_colormap 'cet_rainbow.rgb' --ref_name 'IMERG_Daily' --reference_name 'IMERG Daily' --reference_colormap 'cet_rainbow.rgb' --diff_title 'percent difference' --diff_colormap 'diverging_bwr.rgb' --granulate 'variables' 'plevs' 'regions' --selectors 'sets' 'seasons' --test_start_yr 2 --test_end_yr 18 --ref_start_yr 2001 --ref_end_yr 2010
12+
13+
The script uses the e3sm_diags Python API to configure and run the diagnostics
14+
with the same parameters as the command-line invocation. It is intended to
15+
help identify and resolve the issue with year values less than 1000.
16+
"""
17+
18+
from e3sm_diags.parameter.core_parameter import CoreParameter
19+
from e3sm_diags.run import runner
20+
from e3sm_diags.parameter.tropical_subseasonal_parameter import TropicalSubseasonalParameter
21+
22+
# Set up parameters
23+
param = CoreParameter()
24+
param.no_viewer = True
25+
param.reference_data_path = '/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/time-series'
26+
param.test_data_path = '/lcrc/group/e3sm2/ac.xzheng/E3SMv3_dev/20250404.wcycl1850.ne120pg2_r025_RRSwISC6to18E3r5.test4.chrysalis//post/atm/180x360_traave/ts/daily/1yr'
27+
param.results_dir = '/lcrc/group/e3sm/public_html/diagnostic_output/ac.tvo/tests/tropical_subseasonal_time_fix'
28+
param.case_id = 'wavenumber-frequency'
29+
param.ref_timeseries_input = True
30+
param.test_timeseries_input = True
31+
param.run_type = 'model_vs_obs'
32+
param.sets = ['tropical_subseasonal']
33+
param.variables = ['PRECT']
34+
param.seasons = ['ANN', 'DJF', 'MAM', 'JJA', 'SON']
35+
param.regions = ['15S15N']
36+
param.regrid_tool = 'xesmf'
37+
param.regrid_method = 'conservative_normed'
38+
param.multiprocessing = True
39+
param.num_workers = 32
40+
param.output_format_subplot = ['pdf']
41+
param.canvas_size_w = 1212
42+
param.canvas_size_h = 1628
43+
param.figsize = [8.5, 11.0]
44+
param.dpi = 150
45+
param.arrows = True
46+
param.short_test_name = 'v3.HR_test4'
47+
param.test_colormap = 'cet_rainbow.rgb'
48+
param.ref_name = 'IMERG_Daily'
49+
param.reference_name = 'IMERG Daily'
50+
param.reference_colormap = 'cet_rainbow.rgb'
51+
param.diff_title = 'percent difference'
52+
param.diff_colormap = 'diverging_bwr.rgb'
53+
param.granulate = ['variables', 'plevs', 'regions']
54+
param.selectors = ['sets', 'seasons']
55+
56+
57+
trop_param = TropicalSubseasonalParameter()
58+
trop_param.test_start_yr = 2
59+
trop_param.test_name = 'v3.HR_test4'
60+
61+
trop_param.test_end_yr = 18
62+
trop_param.ref_start_yr = 2001
63+
trop_param.ref_end_yr = 2010
64+
65+
# Run the diagnostics
66+
runner.sets_to_run = ['tropical_subseasonal']
67+
runner.run_diags([param, trop_param])

e3sm_diags/driver/tropical_subseasonal_driver.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from e3sm_diags.driver.utils import zwf_functions as wf
1919
from e3sm_diags.driver.utils.climo_xr import ClimoFreq
2020
from e3sm_diags.driver.utils.dataset_xr import Dataset
21+
from e3sm_diags.driver.utils.general import pad_year
2122
from e3sm_diags.logger import _setup_child_logger
2223
from e3sm_diags.plot.tropical_subseasonal_plot import plot
2324

@@ -130,10 +131,15 @@ def calculate_spectrum(path, variable, start_year, end_year):
130131
"dosymmetries": True,
131132
"rmvLowFrq": True,
132133
}
133-
# TODO the time subsetting and variable derivation should be replaced during cdat revamp
134+
# TODO the time subsetting and variable derivation should be replaced during
135+
# cdat revamp.
136+
137+
start_year_str = pad_year(start_year)
138+
end_year_str = pad_year(end_year)
134139
try:
135140
var = xr.open_mfdataset(glob.glob(f"{path}/{variable}_*.nc")).sel(
136-
lat=slice(-15, 15), time=slice(f"{start_year}-01-01", f"{end_year}-12-31")
141+
lat=slice(-15, 15),
142+
time=slice(f"{start_year_str}-01-01", f"{end_year_str}-12-31"),
137143
)[variable]
138144
actual_start = var.time.dt.year.values[0]
139145
actual_end = var.time.dt.year.values[-1]

e3sm_diags/driver/utils/dataset_xr.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)
3232
from e3sm_diags.driver import FRAC_REGION_KEYS, LAND_OCEAN_MASK_PATH
3333
from e3sm_diags.driver.utils.climo_xr import CLIMO_FREQS, ClimoFreq, climo
34+
from e3sm_diags.driver.utils.general import pad_year
3435
from e3sm_diags.driver.utils.regrid import HYBRID_SIGMA_KEYS
3536
from e3sm_diags.logger import _setup_child_logger
3637

@@ -1197,13 +1198,14 @@ def _get_time_slice(self, ds: xr.Dataset) -> slice:
11971198
f"end_year ({end_yr_int}) > var_end_yr ({var_end_year})."
11981199
)
11991200

1200-
start_yr_str = str(start_yr_int).zfill(4)
1201-
end_yr_str = str(end_yr_int).zfill(4)
1201+
start_yr_str = pad_year(start_yr_int)
1202+
end_yr_str = pad_year(end_yr_int)
12021203

12031204
if self.is_sub_monthly:
12041205
start_time = f"{start_yr_str}-01-01"
12051206

1206-
end_yr_str = str(int(end_yr_str) + 1).zfill(4)
1207+
new_end_year_int = int(end_yr_str) + 1
1208+
end_yr_str = pad_year(new_end_year_int)
12071209
end_time = f"{end_yr_str}-01-01"
12081210
else:
12091211
start_time = self._get_slice_with_bounds(ds, start_yr_str, "start")

e3sm_diags/driver/utils/general.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from e3sm_diags.logger import _setup_child_logger
24

35
logger = _setup_child_logger(__name__)
@@ -15,3 +17,68 @@ def _monotonically_decreasing(L):
1517
def _monotonically_increasing(L):
1618
# FIXME: B905: zip() without an explicit strict= parameter
1719
return all(x <= y for x, y in zip(L, L[1:]))
20+
21+
22+
def pad_year(year: int | str) -> str:
23+
"""Pad the year with leading zeros to ensure it is 4 digits.
24+
25+
This function ensures that the input year is properly formatted as a
26+
4-digit string, which is required for ISO-8601 date formats (YYYY-MM-DD).
27+
If the year is less than 1000, it is padded with leading zeros.
28+
29+
Parameters
30+
----------
31+
year : int or str
32+
The year to pad. Must be a non-negative integer or a string representing
33+
a non-negative integer. Floats are not allowed.
34+
35+
Returns
36+
-------
37+
str
38+
The padded year as a 4-digit string (e.g., "0042" for year 42).
39+
40+
Raises
41+
------
42+
ValueError
43+
If the input year is not a non-negative integer or a string representing
44+
a non-negative integer, or if it is a float, or if it is outside the
45+
range 0 to 9999 inclusive.
46+
47+
Examples
48+
--------
49+
>>> pad_year(42)
50+
'0042'
51+
>>> pad_year("42")
52+
'0042'
53+
>>> pad_year(2023)
54+
'2023'
55+
>>> pad_year("2023")
56+
'2023'
57+
>>> pad_year(-1)
58+
Traceback (most recent call last):
59+
...
60+
ValueError: Year must be between 0 and 9999 inclusive.
61+
>>> pad_year(10000)
62+
Traceback (most recent call last):
63+
...
64+
ValueError: Year must be between 0 and 9999 inclusive.
65+
>>> pad_year(42.0)
66+
Traceback (most recent call last):
67+
...
68+
ValueError: Year must not be a float.
69+
"""
70+
try:
71+
if isinstance(year, float):
72+
raise ValueError("Year must not be a float.")
73+
74+
year = int(year)
75+
except (ValueError, TypeError) as e:
76+
raise ValueError(
77+
"Year must be a non-negative integer or a string representing a "
78+
"non-negative integer."
79+
) from e
80+
81+
if year < 0 or year > 9999:
82+
raise ValueError("Year must be between 0 and 9999 inclusive.")
83+
84+
return f"{year:04d}"

e3sm_diags/parameter/core_parameter.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from e3sm_diags.derivations.derivations import DerivedVariablesMap
1111
from e3sm_diags.driver.utils.climo_xr import ClimoFreq
12+
from e3sm_diags.driver.utils.general import pad_year
1213
from e3sm_diags.driver.utils.regrid import REGRID_TOOLS
1314
from e3sm_diags.logger import _setup_child_logger
1415

@@ -42,6 +43,15 @@
4243
# "tropical_subseasonal",
4344
]
4445

46+
YEAR_ATTRIBUTES = [
47+
"start_yr",
48+
"end_yr",
49+
"test_start_yr",
50+
"test_end_yr",
51+
"ref_start_yr",
52+
"ref_end_yr",
53+
]
54+
4555
if TYPE_CHECKING:
4656
from e3sm_diags.driver.utils.dataset_xr import Dataset
4757

@@ -346,3 +356,12 @@ def _run_diag(self) -> List[Any]:
346356
sys.exit()
347357

348358
return results
359+
360+
def __setattr__(self, name: str, value: Any) -> None:
361+
"""Override setattr to ensure year attributes are padded when set."""
362+
if name in YEAR_ATTRIBUTES and value not in [None, ""]:
363+
# Validate and pad the year before setting the attribute
364+
value = pad_year(value)
365+
366+
# Set the attribute using the superclass method
367+
super().__setattr__(name, value)

tests/e3sm_diags/driver/utils/test_dataset_xr.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,17 @@ def _create_parameter_object(
7575
parameter.ref_timeseries_input = False
7676

7777
parameter.reference_data_path = data_path
78-
parameter.ref_start_yr = start_yr # type: ignore
79-
parameter.ref_end_yr = end_yr # type: ignore
78+
parameter.ref_start_yr = start_yr
79+
parameter.ref_end_yr = end_yr
8080
elif dataset_type == "test":
8181
if data_type == "time_series":
8282
parameter.test_timeseries_input = True
8383
else:
8484
parameter.test_timeseries_input = False
8585

8686
parameter.test_data_path = data_path
87-
parameter.test_start_yr = start_yr # type: ignore
88-
parameter.test_end_yr = end_yr # type: ignore
87+
parameter.test_start_yr = start_yr
88+
parameter.test_end_yr = end_yr
8989

9090
return parameter
9191

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pytest
2+
3+
from e3sm_diags.driver.utils.general import pad_year
4+
5+
6+
class TestPadYear:
7+
@pytest.mark.parametrize(
8+
"input_year, expected_output",
9+
[
10+
(42, "0042"),
11+
("42", "0042"),
12+
(2023, "2023"),
13+
("2023", "2023"),
14+
(0, "0000"),
15+
("0", "0000"),
16+
(9999, "9999"),
17+
("9999", "9999"),
18+
],
19+
)
20+
def test_valid_years(self, input_year, expected_output):
21+
assert pad_year(input_year) == expected_output
22+
23+
@pytest.mark.parametrize(
24+
"invalid_year",
25+
[
26+
-1,
27+
"abcd",
28+
None,
29+
10000,
30+
"10000",
31+
1.5,
32+
"1.5",
33+
],
34+
)
35+
def test_invalid_years(self, invalid_year):
36+
with pytest.raises(ValueError):
37+
pad_year(invalid_year)

tests/e3sm_diags/test_parameters.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ def test__add__copies_attributes_from_other_object(self):
3131
param2 = CoreParameter()
3232

3333
# Add custom attributes to the second object
34-
param2.test_start_yr = 2000 # type: ignore
35-
param2.test_end_yr = 2001 # type: ignore
34+
param2.test_start_yr = 2000
35+
param2.test_end_yr = 2001
3636

3737
new_param = param2 + param1
3838

39-
assert new_param.test_start_yr == 2000
40-
assert new_param.test_end_yr == 2001
39+
assert new_param.test_start_yr == "2000"
40+
assert new_param.test_end_yr == "2001"
4141

4242
def test_check_values_does_not_raise_error_if_required_args_are_set(self):
4343
param = CoreParameter()
@@ -105,6 +105,26 @@ def test_logs_error_if_driver_module_for_set_not_found(self, caplog):
105105
in caplog.text
106106
)
107107

108+
def test_year_properties_automatic_padding(self):
109+
"""Test that year properties are automatically zero-padded to 4 digits."""
110+
param = CoreParameter()
111+
112+
# Test assigning various year formats
113+
param.start_yr = "95"
114+
param.end_yr = 2000
115+
param.test_start_yr = "0"
116+
param.test_end_yr = 95
117+
param.ref_start_yr = "100"
118+
param.ref_end_yr = "9999"
119+
120+
# Verify that all values have been properly padded
121+
assert param.start_yr == "0095" # type: ignore
122+
assert param.end_yr == "2000" # type: ignore
123+
assert param.test_start_yr == "0000" # type: ignore
124+
assert param.test_end_yr == "0095" # type: ignore
125+
assert param.ref_start_yr == "0100" # type: ignore
126+
assert param.ref_end_yr == "9999" # type: ignore
127+
108128
@pytest.mark.xfail
109129
def test_logs_exception_if_driver_run_diag_function_fails(self, caplog):
110130
# TODO: Need to implement this test by raising an exception through

0 commit comments

Comments
 (0)