Skip to content

Commit 5399941

Browse files
sunt05claude
andauthored
feat: add utility to resample SUEWS output before gen_epw (#1015)
Resolves GH#150 by exposing resample_output in supy.util and enhancing gen_epw to accept optional freq and grid parameters. Users can now resample 5-minute output to hourly before EPW generation using variable-appropriate aggregation (mean for instantaneous, sum for accumulated). Includes comprehensive docstrings and test coverage. 🤖 Generated with Claude Code Co-authored-by: Claude <noreply@anthropic.com>
1 parent 9fbab18 commit 5399941

4 files changed

Lines changed: 281 additions & 9 deletions

File tree

src/supy/_post.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,67 @@ def pack_df_output_block(dict_output_block, df_forcing_block):
166166

167167
# resample supy output
168168
def resample_output(df_output, freq="60min", dict_aggm=dict_var_aggm):
169+
"""Resample SUEWS simulation output to a different temporal frequency.
170+
171+
This function resamples time series data using variable-appropriate
172+
aggregation methods. Different variable types are handled correctly:
173+
174+
- **Instantaneous** (temperature, humidity, wind): averaged (mean)
175+
- **Accumulated** (rainfall, runoff): summed
176+
- **State** (soil moisture, daily state): last value
177+
178+
Parameters
179+
----------
180+
df_output : pandas.DataFrame
181+
Output DataFrame from `run_supy`, with MultiIndex (grid, datetime)
182+
and MultiIndex columns (group, var).
183+
freq : str, optional
184+
Target frequency using pandas offset aliases.
185+
Common values: '30min', '60min' or 'h', '3h', 'D'.
186+
Default is '60min' (hourly).
187+
dict_aggm : dict, optional
188+
Custom aggregation rules. Default uses OUTPUT_REGISTRY rules.
189+
Format: {group: {variable: agg_function}}
190+
191+
Returns
192+
-------
193+
pandas.DataFrame
194+
Resampled DataFrame with same structure as input.
195+
196+
Notes
197+
-----
198+
The SUEWS convention uses right-closed intervals with right labels,
199+
meaning timestamps represent the END of each period.
200+
201+
Examples
202+
--------
203+
Basic usage - resample to hourly:
204+
205+
>>> import supy as sp
206+
>>> df_state_init, df_forcing = sp.load_SampleData()
207+
>>> df_output, df_state_final = sp.run_supy(df_forcing, df_state_init)
208+
>>> df_hourly = sp.resample_output(df_output, freq='h')
209+
210+
Resample for EPW generation:
211+
212+
>>> df_hourly = sp.resample_output(df_output, freq='h')
213+
>>> grid = df_hourly.index.get_level_values('grid')[0]
214+
>>> df_epw, meta, path = sp.util.gen_epw(
215+
... df_hourly.loc[grid, 'SUEWS'],
216+
... lat=51.5, lon=-0.1
217+
... )
218+
219+
Or use the convenience freq parameter in gen_epw:
220+
221+
>>> df_epw, meta, path = sp.util.gen_epw(
222+
... df_output, lat=51.5, lon=-0.1, freq='h'
223+
... )
224+
225+
See Also
226+
--------
227+
supy.util.gen_epw : Generate EPW files (supports freq parameter)
228+
supy.data_model.output.OUTPUT_REGISTRY : Aggregation rules source
229+
"""
169230
# Helper function to resample a group with specified parameters
170231
def _resample_group(df_group, freq, label, dict_aggm_group, group_name=None):
171232
"""Resample a dataframe group with specified aggregation rules.

src/supy/util/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,6 @@
5757

5858
from ._spinup import get_spinup_state
5959

60+
from .._post import resample_output
61+
6062
# from ._config import SUEWSConfig, init_config_from_yaml

src/supy/util/_tmy.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from pathlib import Path
2-
from typing import Tuple
2+
from typing import Optional, Tuple, Union
33

44
import numpy as np
55
import pandas as pd
66

7+
from .._post import resample_output
8+
79

810
#################################################################
911
# generate TMY dataframe from supy results
@@ -340,25 +342,39 @@ def read_epw(path_epw: Path) -> pd.DataFrame:
340342
# generate EPW file from `df_TMY`
341343
def gen_epw(
342344
df_output: pd.DataFrame,
343-
lat,
344-
lon,
345-
tz=0,
346-
path_epw=Path("./uTMY.epw"),
345+
lat: float,
346+
lon: float,
347+
tz: float = 0,
348+
path_epw: Union[str, Path] = Path("./uTMY.epw"),
349+
freq: Optional[str] = None,
350+
grid: Optional[int] = None,
347351
) -> Tuple[pd.DataFrame, str, Path]:
348-
"""Generate an ``epw`` file of uTMY (urbanised Typical Meteorological Year) using SUEWS simulation results
352+
"""Generate an ``epw`` file of uTMY (urbanised Typical Meteorological Year) using SUEWS simulation results.
349353
350354
Parameters
351355
----------
352356
df_output : pandas.DataFrame
353-
SUEWS simulation results.
357+
SUEWS simulation results. Can be either:
358+
359+
- Full MultiIndex output from `run_supy` (grid, datetime) x (group, var)
360+
- Pre-extracted single-grid SUEWS output (datetime) x (var)
354361
lat : float
355362
Latitude of the site, used for calculating solar angle.
356363
lon : float
357364
Longitude of the site, used for calculating solar angle.
358365
tz : float, optional
359-
time zone represented by time difference from UTC+0 (e.g., 8 for UTC+8), by default 0 (i.e., UTC+0)
360-
path_epw : pathlib.Path, optional
366+
Time zone represented by time difference from UTC+0 (e.g., 8 for UTC+8),
367+
by default 0 (i.e., UTC+0).
368+
path_epw : pathlib.Path or str, optional
361369
Path to store generated epw file, by default Path('./uTMY.epw').
370+
freq : str, optional
371+
Target frequency for resampling (e.g., 'h', '60min', '1h').
372+
If provided, the output is resampled before EPW generation using
373+
variable-appropriate aggregation methods.
374+
Recommended for sub-hourly simulation output. Default is None (no resampling).
375+
grid : int, optional
376+
Grid number to extract if df_output has MultiIndex (grid, datetime).
377+
If not provided and MultiIndex detected, uses the first grid.
362378
363379
Returns
364380
-------
@@ -378,6 +394,26 @@ def gen_epw(
378394
pvlib is not included as a required dependency due to its h5py requirement
379395
which can cause build issues on some platforms.
380396
397+
Examples
398+
--------
399+
Basic usage with pre-extracted data:
400+
401+
>>> df_epw, meta, path = sp.util.gen_epw(
402+
... df_output.loc[grid, 'SUEWS'],
403+
... lat=51.5, lon=-0.1
404+
... )
405+
406+
With automatic resampling and grid extraction:
407+
408+
>>> df_epw, meta, path = sp.util.gen_epw(
409+
... df_output, # Full MultiIndex output from run_supy
410+
... lat=51.5, lon=-0.1,
411+
... freq='h' # Resample to hourly
412+
... )
413+
414+
See Also
415+
--------
416+
supy.resample_output : Resample output with variable-appropriate aggregation
381417
"""
382418
import atmosp
383419
from pathlib import Path
@@ -390,6 +426,30 @@ def gen_epw(
390426
"Note: pvlib requires h5py which may need compilation on some systems."
391427
)
392428

429+
# Handle MultiIndex input from run_supy
430+
if isinstance(df_output.index, pd.MultiIndex):
431+
# Extract grid if needed
432+
if grid is None:
433+
grid = df_output.index.get_level_values("grid").unique()[0]
434+
435+
# Resample if frequency specified (before extracting to single grid)
436+
if freq is not None:
437+
df_output = resample_output(df_output, freq=freq)
438+
439+
# Extract SUEWS group for the specified grid
440+
if isinstance(df_output.columns, pd.MultiIndex):
441+
groups = df_output.columns.get_level_values("group").unique()
442+
if "SUEWS" in groups:
443+
df_output = df_output.loc[grid, "SUEWS"]
444+
else:
445+
df_output = df_output.loc[grid]
446+
else:
447+
df_output = df_output.loc[grid]
448+
elif freq is not None:
449+
# Single-grid input with freq specified - use simple resampling
450+
# For single-grid SUEWS output, variables are typically averaged
451+
df_output = df_output.resample(freq, closed="right", label="right").mean()
452+
393453
# select months from representative years
394454
df_tmy = gen_TMY(df_output.copy())
395455

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Test gen_epw with resampling functionality (GitHub issue #150)."""
2+
3+
import pandas as pd
4+
import numpy as np
5+
import pytest
6+
import supy as sp
7+
8+
9+
class TestGenEpwResample:
10+
"""Tests for gen_epw with frequency parameter and resample_output exposure."""
11+
12+
def test_resample_output_in_util(self):
13+
"""Test that resample_output is accessible via supy.util."""
14+
assert hasattr(sp.util, "resample_output")
15+
16+
# Test it works with actual data
17+
df_state_init, df_forcing = sp.load_SampleData()
18+
df_output, _ = sp.run_supy(df_forcing.iloc[:48], df_state_init)
19+
20+
df_hourly = sp.util.resample_output(df_output, freq="h")
21+
22+
# Should have fewer rows after resampling
23+
assert len(df_hourly) < len(df_output)
24+
25+
# Structure should be preserved
26+
assert isinstance(df_hourly.index, pd.MultiIndex)
27+
assert "grid" in df_hourly.index.names
28+
29+
def test_resample_output_frequency_aliases(self):
30+
"""Test that different frequency aliases work correctly."""
31+
df_state_init, df_forcing = sp.load_SampleData()
32+
df_output, _ = sp.run_supy(df_forcing.iloc[:144], df_state_init) # 12 hours
33+
34+
# Test various frequency aliases
35+
for freq in ["30min", "60min", "h", "1h"]:
36+
df_resampled = sp.util.resample_output(df_output, freq=freq)
37+
assert len(df_resampled) > 0
38+
39+
# Hourly should have fewer rows than 30-minute
40+
df_30min = sp.util.resample_output(df_output, freq="30min")
41+
df_hourly = sp.util.resample_output(df_output, freq="h")
42+
assert len(df_hourly) < len(df_30min)
43+
44+
def test_resample_aggregation_methods(self):
45+
"""Test that aggregation methods are applied correctly."""
46+
df_state_init, df_forcing = sp.load_SampleData()
47+
df_output, _ = sp.run_supy(df_forcing.iloc[:288], df_state_init) # 1 day
48+
49+
df_hourly = sp.util.resample_output(df_output, freq="h")
50+
51+
# Get first grid
52+
grid = df_hourly.index.get_level_values("grid")[0]
53+
54+
# Check that SUEWS variables exist
55+
assert "SUEWS" in df_hourly.columns.get_level_values("group").unique()
56+
57+
# Variables used by gen_epw should be present
58+
suews_vars = df_hourly.loc[grid, "SUEWS"].columns.tolist()
59+
for var in ["T2", "RH2", "U10", "Kdown"]:
60+
assert var in suews_vars, f"Variable {var} not found in resampled output"
61+
62+
63+
class TestGenEpwMultiIndexInput:
64+
"""Tests for gen_epw handling MultiIndex input directly."""
65+
66+
@pytest.fixture
67+
def sample_output(self):
68+
"""Create sample SUEWS output for testing."""
69+
df_state_init, df_forcing = sp.load_SampleData()
70+
# Use enough data for meaningful test but not too much
71+
df_output, _ = sp.run_supy(df_forcing.iloc[:288], df_state_init) # 1 day
72+
return df_output
73+
74+
def test_gen_epw_accepts_multiindex(self, sample_output, tmp_path):
75+
"""Test that gen_epw accepts MultiIndex input without freq."""
76+
grid = sample_output.index.get_level_values("grid")[0]
77+
78+
try:
79+
# This should work - providing extracted data (original behaviour)
80+
df_epw, meta, path = sp.util.gen_epw(
81+
sample_output.loc[grid, "SUEWS"],
82+
lat=51.5,
83+
lon=-0.1,
84+
path_epw=tmp_path / "test.epw",
85+
)
86+
assert isinstance(df_epw, pd.DataFrame)
87+
except ImportError:
88+
pytest.skip("pvlib not installed")
89+
90+
def test_gen_epw_with_grid_extraction(self, sample_output, tmp_path):
91+
"""Test that gen_epw extracts grid automatically from MultiIndex."""
92+
try:
93+
# This should auto-extract first grid
94+
df_epw, meta, path = sp.util.gen_epw(
95+
sample_output, # Full MultiIndex
96+
lat=51.5,
97+
lon=-0.1,
98+
path_epw=tmp_path / "test_auto.epw",
99+
)
100+
assert isinstance(df_epw, pd.DataFrame)
101+
except ImportError:
102+
pytest.skip("pvlib not installed")
103+
104+
def test_gen_epw_with_freq_param(self, sample_output, tmp_path):
105+
"""Test that gen_epw accepts freq parameter for resampling."""
106+
try:
107+
df_epw, meta, path = sp.util.gen_epw(
108+
sample_output, # Full MultiIndex
109+
lat=51.5,
110+
lon=-0.1,
111+
freq="h", # Resample to hourly
112+
path_epw=tmp_path / "test_freq.epw",
113+
)
114+
assert isinstance(df_epw, pd.DataFrame)
115+
except ImportError:
116+
pytest.skip("pvlib not installed")
117+
118+
def test_gen_epw_with_specific_grid(self, sample_output, tmp_path):
119+
"""Test that gen_epw accepts specific grid parameter."""
120+
grid = sample_output.index.get_level_values("grid")[0]
121+
122+
try:
123+
df_epw, meta, path = sp.util.gen_epw(
124+
sample_output,
125+
lat=51.5,
126+
lon=-0.1,
127+
grid=grid, # Specify grid explicitly
128+
path_epw=tmp_path / "test_grid.epw",
129+
)
130+
assert isinstance(df_epw, pd.DataFrame)
131+
except ImportError:
132+
pytest.skip("pvlib not installed")
133+
134+
def test_gen_epw_freq_with_extracted_data(self, sample_output, tmp_path):
135+
"""Test that freq works with pre-extracted single-grid data."""
136+
grid = sample_output.index.get_level_values("grid")[0]
137+
df_single = sample_output.loc[grid, "SUEWS"]
138+
139+
try:
140+
df_epw, meta, path = sp.util.gen_epw(
141+
df_single,
142+
lat=51.5,
143+
lon=-0.1,
144+
freq="h", # Should still work with simple resampling
145+
path_epw=tmp_path / "test_extracted_freq.epw",
146+
)
147+
assert isinstance(df_epw, pd.DataFrame)
148+
except ImportError:
149+
pytest.skip("pvlib not installed")

0 commit comments

Comments
 (0)