Skip to content

Commit 95a62dd

Browse files
authored
Merge pull request #145 from Deltares/feat/DEI-263-monthly-average
Feat/DEI-263 multi year monthly average
2 parents 1e20984 + 4b1b491 commit 95a62dd

11 files changed

Lines changed: 357 additions & 50 deletions

File tree

decoimpact/business/entities/rules/time_aggregation_rule.py

Lines changed: 116 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111
TimeAggregationRule
1212
"""
1313

14-
from typing import List
14+
from typing import List, Optional
1515

1616
import numpy as _np
1717
import xarray as _xr
18-
from xarray.core.resample import DataArrayResample
1918

2019
from decoimpact.business.entities.rules.i_array_based_rule import IArrayBasedRule
2120
from decoimpact.business.entities.rules.rule_base import RuleBase
@@ -36,18 +35,44 @@ def __init__(
3635
name: str,
3736
input_variable_names: List[str],
3837
operation_type: TimeOperationType,
38+
multi_year_start: Optional[int] = None,
39+
multi_year_end: Optional[int] = None,
3940
):
41+
# pylint: disable=too-many-arguments
42+
# pylint: disable=too-many-positional-arguments
4043
super().__init__(name, input_variable_names)
4144
self._settings = TimeOperationSettings({"month": "ME", "year": "YE"})
4245
self._settings.percentile_value = 0
4346
self._settings.operation_type = operation_type
4447
self._settings.time_scale = "year"
48+
self._multi_year_start = multi_year_start
49+
self._multi_year_end = multi_year_end
4550

4651
@property
4752
def settings(self):
4853
"""Time operation settings"""
4954
return self._settings
5055

56+
@property
57+
def multi_year_start(self) -> Optional[int]:
58+
"""Start year for the aggregation (inclusive)"""
59+
return self._multi_year_start
60+
61+
@multi_year_start.setter
62+
def multi_year_start(self, value: Optional[int]) -> None:
63+
# optional validation here
64+
self._multi_year_start = value
65+
66+
@property
67+
def multi_year_end(self) -> Optional[int]:
68+
"""End year for the aggregation (inclusive)"""
69+
return self._multi_year_end
70+
71+
@multi_year_end.setter
72+
def multi_year_end(self, value: Optional[int]) -> None:
73+
# optional validation here
74+
self._multi_year_end = value
75+
5176
def validate(self, logger: ILogger) -> bool:
5277
"""Validates if the rule is valid
5378
@@ -56,6 +81,21 @@ def validate(self, logger: ILogger) -> bool:
5681
"""
5782
return self.settings.validate(self.name, logger)
5883

84+
def filter_years(
85+
self, time_dim_name: str, value_array: _xr.DataArray
86+
) -> _xr.DataArray:
87+
"""Filters the values for the specified start and end year
88+
89+
Args:
90+
value_array (DataArray): value to filter
91+
"""
92+
start = (
93+
str(self._multi_year_start) if self._multi_year_start is not None else None
94+
)
95+
end = str(self._multi_year_end) if self._multi_year_end is not None else None
96+
slice_obj = slice(start, end)
97+
return value_array.sel({time_dim_name: slice_obj})
98+
5999
def execute(self, value_array: _xr.DataArray, logger: ILogger) -> _xr.DataArray:
60100
"""Aggregates the values for the specified start and end date
61101
@@ -81,14 +121,19 @@ def execute(self, value_array: _xr.DataArray, logger: ILogger) -> _xr.DataArray:
81121

82122
dim_name = get_dict_element(settings.time_scale, settings.time_scale_mapping)
83123

124+
# get the name of the time dimension
84125
time_dim_name = get_time_dimension_name(value_array, logger)
85-
aggregated_values = value_array.resample({time_dim_name: dim_name}, skipna=True)
86-
87-
result = self._perform_operation(aggregated_values)
88-
# create a new aggregated time dimension based on original time dimension
89126

90-
result_time_dim_name = f"{time_dim_name}_{settings.time_scale}"
91-
result = result.rename({time_dim_name: result_time_dim_name})
127+
# perform aggregations in case of multi-year monthly average
128+
if TimeOperationType.MULTI_YEAR_MONTHLY_AVERAGE == settings.operation_type:
129+
result, result_time_dim_name = self._handle_grouping_operation(
130+
time_dim_name, value_array
131+
)
132+
# perform the operation in case of other operation types
133+
else:
134+
result, result_time_dim_name = self._handle_aggregation_operation(
135+
settings, dim_name, time_dim_name, value_array
136+
)
92137

93138
for key, value in value_array[time_dim_name].attrs.items():
94139
if value:
@@ -102,12 +147,73 @@ def execute(self, value_array: _xr.DataArray, logger: ILogger) -> _xr.DataArray:
102147

103148
return result
104149

105-
def _perform_operation(self, aggregated_values: DataArrayResample) -> _xr.DataArray:
150+
def _handle_grouping_operation(
151+
self,
152+
time_dim_name: str,
153+
value_array: _xr.DataArray,
154+
) -> tuple[_xr.DataArray, str]:
155+
"""Handles the grouping operation for multi-year monthly average"""
156+
filtered_values = self.filter_years(time_dim_name, value_array)
157+
grouped_values = filtered_values.groupby(f"{time_dim_name}.month")
158+
grouping_result = self._perform_grouping_operation(grouped_values)
159+
# create a new aggregated time dimension based on original time dimension
160+
_result_time_dim_name = f"{time_dim_name}_monthly"
161+
grouping_result = grouping_result.rename({"month": _result_time_dim_name})
162+
return grouping_result, _result_time_dim_name
163+
164+
def _handle_aggregation_operation(
165+
self,
166+
settings: TimeOperationSettings,
167+
dim_name: str,
168+
time_dim_name: str,
169+
value_array: _xr.DataArray,
170+
) -> tuple[_xr.DataArray, str]:
171+
"""Handles the aggregation operation for other operation types"""
172+
aggregated_values = value_array.resample({time_dim_name: dim_name}, skipna=True)
173+
# create a new aggregated time dimension based on original time dimension
174+
aggregation_result = self._perform_operation(
175+
aggregated_values, settings.operation_type
176+
)
177+
_result_time_dim_name = f"{time_dim_name}_{settings.time_scale}"
178+
aggregation_result = aggregation_result.rename(
179+
{time_dim_name: _result_time_dim_name}
180+
)
181+
return aggregation_result, _result_time_dim_name
182+
183+
def _perform_grouping_operation(
184+
self,
185+
grouped_values,
186+
) -> _xr.DataArray:
187+
"""Returns the values based on the grouping operation type
188+
189+
Args:
190+
aggregated_values (DataArrayGroupBy): aggregate values
191+
operation_type (TimeOperationType): the operation type
192+
time_dim_name (str): name of the time dimension (for MONTHLY_AVERAGE)
193+
Raises:
194+
NotImplementedError: If operation type is not supported
195+
196+
Returns:
197+
DataArray: Values of operation type
198+
"""
199+
# Compute mean across years for each calendar month
200+
monthly = grouped_values.mean(skipna=True)
201+
# Ensure all 12 months are present (1..12), insert NaNs using reindex
202+
months = _np.arange(1, 13)
203+
result = monthly.reindex({"month": months})
204+
205+
return _xr.DataArray(result)
206+
207+
def _perform_operation(
208+
self,
209+
aggregated_values,
210+
operation_type: TimeOperationType,
211+
) -> _xr.DataArray:
106212
"""Returns the values based on the operation type
107213
108214
Args:
109215
aggregated_values (DataArrayResample): aggregate values
110-
216+
operation_type (TimeOperationType): the operation type
111217
Raises:
112218
NotImplementedError: If operation type is not supported
113219
@@ -120,8 +226,6 @@ def _perform_operation(self, aggregated_values: DataArrayResample) -> _xr.DataAr
120226
TimeOperationType.AVG_DURATION_PERIODS,
121227
]
122228

123-
operation_type = self.settings.operation_type
124-
125229
if operation_type is TimeOperationType.ADD:
126230
result = aggregated_values.sum()
127231

decoimpact/business/workflow/model_builder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ def _create_rule(rule_data: IRuleData) -> IRule:
147147
)
148148
rule.settings.percentile_value = rule_data.percentile_value
149149
rule.settings.time_scale = rule_data.time_scale
150+
rule.multi_year_start = rule_data.multi_year_start
151+
rule.multi_year_end = rule_data.multi_year_end
150152
elif isinstance(rule_data, IRollingStatisticsRuleData):
151153
rule = RollingStatisticsRule(
152154
rule_data.name, [rule_data.input_variable], rule_data.operation
@@ -159,7 +161,7 @@ def _create_rule(rule_data: IRuleData) -> IRule:
159161
rule_data.name,
160162
rule_data.input_variable_names,
161163
MultiArrayOperationType[rule_data.operation_type],
162-
rule_data.ignore_nan
164+
rule_data.ignore_nan,
163165
)
164166
elif isinstance(rule_data, IResponseCurveRuleData):
165167
rule = ResponseCurveRule(

decoimpact/data/api/i_time_aggregation_rule_data.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515

1616
from abc import ABC, abstractmethod
17+
from typing import Optional
1718

1819
from decoimpact.data.api.i_rule_data import IRuleData
1920
from decoimpact.data.api.time_operation_type import TimeOperationType
@@ -41,3 +42,13 @@ def percentile_value(self) -> float:
4142
@abstractmethod
4243
def time_scale(self) -> str:
4344
"""Time scale"""
45+
46+
@property
47+
@abstractmethod
48+
def multi_year_start(self) -> Optional[int]:
49+
"""Start year for the aggregation (inclusive)"""
50+
51+
@property
52+
@abstractmethod
53+
def multi_year_end(self) -> Optional[int]:
54+
"""End year for the aggregation (inclusive)"""

decoimpact/data/api/time_operation_type.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ class TimeOperationType(IntEnum):
2626
AVG_DURATION_PERIODS = 8
2727
STDEV = 9
2828
PERCENTILE = 10
29+
MULTI_YEAR_MONTHLY_AVERAGE = 11

decoimpact/data/entities/time_aggregation_rule_data.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
1313
"""
1414

15+
from typing import Optional
16+
1517
from decoimpact.data.api.i_time_aggregation_rule_data import ITimeAggregationRuleData
1618
from decoimpact.data.api.time_operation_type import TimeOperationType
1719
from decoimpact.data.entities.time_operation_rule_data import TimeOperationRuleData
@@ -20,11 +22,42 @@
2022
class TimeAggregationRuleData(TimeOperationRuleData, ITimeAggregationRuleData):
2123
"""Class for storing data related to time_aggregation rule"""
2224

23-
def __init__(self, name: str, operation: TimeOperationType, input_variable: str):
25+
def __init__(
26+
self,
27+
name: str,
28+
operation: TimeOperationType,
29+
input_variable: str,
30+
multi_year_start: Optional[int] = None,
31+
multi_year_end: Optional[int] = None,
32+
):
33+
# pylint: disable=too-many-arguments
34+
# pylint: disable=too-many-positional-arguments
2435
super().__init__(name, operation)
2536
self._input_variable = input_variable
37+
self._multi_year_start = multi_year_start
38+
self._multi_year_end = multi_year_end
2639

2740
@property
2841
def input_variable(self) -> str:
2942
"""Name of the input variable"""
3043
return self._input_variable
44+
45+
@property
46+
def multi_year_start(self) -> Optional[int]:
47+
"""Start year for aggregation"""
48+
return self._multi_year_start
49+
50+
@multi_year_start.setter
51+
def multi_year_start(self, value: Optional[int]) -> None:
52+
# optional validation here
53+
self._multi_year_start = value
54+
55+
@property
56+
def multi_year_end(self) -> Optional[int]:
57+
"""End year for aggregation"""
58+
return self._multi_year_end
59+
60+
@multi_year_end.setter
61+
def multi_year_end(self, value: Optional[int]) -> None:
62+
# optional validation here
63+
self._multi_year_end = value

decoimpact/data/parsers/parser_time_aggregation_rule.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ def parse_dict(self, dictionary: Dict[str, Any], logger: ILogger) -> IRuleData:
4242
input_variable_name: str = get_dict_element("input_variable", dictionary)
4343
operation: str = get_dict_element("operation", dictionary)
4444
time_scale: str = get_dict_element("time_scale", dictionary)
45+
multi_year_start: int = get_dict_element("multi_year_start", dictionary, False)
46+
multi_year_end: int = get_dict_element("multi_year_end", dictionary, False)
4547
output_variable_name: str = get_dict_element("output_variable", dictionary)
4648

4749
operation_value, percentile_value = parse_operation_values(operation)
@@ -50,6 +52,8 @@ def parse_dict(self, dictionary: Dict[str, Any], logger: ILogger) -> IRuleData:
5052

5153
rule_data.percentile_value = percentile_value
5254
rule_data.time_scale = time_scale
55+
rule_data.multi_year_start = multi_year_start
56+
rule_data.multi_year_end = multi_year_end
5357
rule_data.output_variable = output_variable_name
5458
rule_data.description = description
5559

docs/manual/rules/time_aggregation_rule.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ FORMAT
99
time_scale : <time_aggregation_applied>
1010
input_variable: <one_input_variable_name>
1111
output_variable: <one_output_variable_name>
12+
multi_year_start: <start_year>
13+
multi_year_end: <end_year>
1214
```
1315

1416
The time aggregation rule allows for calculating a statistical summary over the time axes of 3D and 2D variables. This could be used for calculating the maximum value over a year (e.g., for water level) or the minimum value over a month (e.g., oxygen concentration). The rule operates both on 3D variables and 2D variables as long as they have a time axis and returns a 3D or 2D result depending on input with the statistic calculated for a new time axis (e.g., year or month).
15-
Operations available: Add, Average, Median, Min, Max, period statistics, Stdev and Percentile(n). When using percentile, add a number for the nth percentile with brackets like this: percentile(10). Stdev calculates the standard- deviation over the time period. Under period statistics are explained further in the text.
17+
Operations available: Add, Average, Multi_year_monthly_average, Median, Min, Max, period statistics, Stdev and Percentile(n). When using percentile, add a number for the nth percentile with brackets like this: percentile(10). Stdev calculates the standard- deviation over the time period. Under period statistics are explained further in the text.
1618

17-
Time aggregation available: Year, Month
19+
Time aggregation available: Year, Month, Multi_year_monthly_average
1820

1921
The rule needs to be applied to an existing 2D/3D variable with time axis. A new 2D/3D variable with new time axis is created when the rule is executed. With a year timestep the result is written to the last day of the year, with a month timestep the result is written to the last day of the month per year.
2022

@@ -29,6 +31,10 @@ The rule needs to be applied to an existing 2D/3D variable with time axis. A new
2931
output_variable: MAX_water_level_year
3032
```
3133

34+
To aggregate an average per month, use the combination of operation AVERAGE with time_scale MONTH.
35+
To aggregate a multi-year average over all months, use the combination of operation MULTI_YEAR_MONTHLY_AVERAGE with time_scale MONTH. Then, for each month, the average over the time frame is calculated. The output variable is related to an extra time dimension time_monthly with all 12 months (in numbers).
36+
In combination with the operation MULTI_YEAR_MONTHLY_AVERAGE it is possible do this is for a specific range of years by adding the optional parameters MULTI_YEAR_START and MULTI_YEAR_END.
37+
3238
![Result Time aggregation rule](../../assets/images/3_result_time_aggregation.png "Water level (in m NAP, left-hand side) with a timestep every 10 days has been summarized to the maximum for each year (right-hand side) while maintaining the face dimension (layer dimension is not present in this example, but would be maintained).")
3339

3440
Period statistics: Time aggregation rule with COUNT_PERIODS, AVG_DURATION_PERIODS, MIN_DURATION_PERIODS and MAX_DURATION_PERIODS

template_input.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ rules:
2929
time_scale:
3030
input_variable:
3131
output_variable:
32+
multi_year_start:
33+
multi_year_end:
3234

3335
- step_function_rule:
3436
name:

0 commit comments

Comments
 (0)