Skip to content

Commit 2a188e5

Browse files
authored
Merge pull request #144 from Deltares/DEI-260-handle-NaN
DEI-260: handle NaN in CombineResultsRule and TimeAggregationRule
2 parents 724b12f + 51e1586 commit 2a188e5

11 files changed

Lines changed: 332 additions & 13 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ INFO - [10:44:34] Serving on http://127.0.0.1:8000/
139139
In this case the docs are available on http://127.0.0.1:8000/ or http://localhost:8000
140140

141141
For each release a version of documentation is available at:
142-
[deltares.github.io/D-EcoImpact/](deltares.github.io/D-EcoImpact/)
142+
[deltares.github.io/D-EcoImpact/](https://deltares.github.io/D-EcoImpact/)
143143

144144
## Add acceptance tests
145145

decoimpact/business/entities/rules/combine_results_rule.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,23 @@ def __init__(
3434
name: str,
3535
input_variable_names: List[str],
3636
operation_type: MultiArrayOperationType,
37+
ignore_nan: bool = False,
3738
):
3839
super().__init__(name, input_variable_names)
3940
self._operation_type: MultiArrayOperationType = operation_type
41+
self._ignore_nan = ignore_nan
4042
self._operations = self._create_operations()
4143

4244
@property
4345
def operation_type(self) -> MultiArrayOperationType:
4446
"""Name of the rule"""
4547
return self._operation_type
4648

49+
@property
50+
def ignore_nan(self) -> bool:
51+
"""Indicates if NaN values should be ignored in the calculations"""
52+
return self._ignore_nan
53+
4754
def validate(self, logger: ILogger) -> bool:
4855
if self._operation_type not in self._operations:
4956

@@ -89,6 +96,19 @@ def execute(
8996
return result_variable
9097

9198
def _create_operations(self) -> dict[MultiArrayOperationType, Callable]:
99+
if self.ignore_nan:
100+
return {
101+
MultiArrayOperationType.MULTIPLY: lambda npa: _np.prod(npa, axis=0),
102+
MultiArrayOperationType.MIN: lambda npa: _np.nanmin(npa, axis=0),
103+
MultiArrayOperationType.MAX: lambda npa: _np.nanmax(npa, axis=0),
104+
MultiArrayOperationType.AVERAGE: lambda npa: _np.nanmean(npa, axis=0),
105+
MultiArrayOperationType.MEDIAN: lambda npa: _np.nanmedian(npa, axis=0),
106+
MultiArrayOperationType.ADD: lambda npa: _np.nansum(npa, axis=0),
107+
MultiArrayOperationType.SUBTRACT: lambda npa: _np.subtract(
108+
npa[0], _np.nansum(npa[1:], axis=0)
109+
),
110+
}
111+
# and if ignore_nan is False:
92112
return {
93113
MultiArrayOperationType.MULTIPLY: lambda npa: _np.prod(npa, axis=0),
94114
MultiArrayOperationType.MIN: lambda npa: _np.min(npa, axis=0),
@@ -101,7 +121,7 @@ def _create_operations(self) -> dict[MultiArrayOperationType, Callable]:
101121
),
102122
}
103123

104-
def _check_dimensions(self, np_arrays: List[_np.array]) -> bool:
124+
def _check_dimensions(self, np_arrays: List[_np.ndarray]) -> bool:
105125
"""Brief check if all the arrays to be combined have the
106126
same size/dimension/length
107127
Args:

decoimpact/business/entities/rules/time_aggregation_rule.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,22 @@ def execute(self, value_array: _xr.DataArray, logger: ILogger) -> _xr.DataArray:
6767
"""
6868
settings = self._settings
6969
if settings.operation_type is TimeOperationType.COUNT_PERIODS:
70-
# Check if all values in a COUNT_PERIODS value array are either 0 or 1
71-
compare_values = (value_array == 0) | (value_array == 1)
70+
# Check if all values in a COUNT_PERIODS value array
71+
# are either 0 or 1 or NaN
72+
compare_values = (
73+
(value_array == 0) | (value_array == 1) | _np.isnan(value_array)
74+
)
7275
check_values = _xr.where(compare_values, True, False)
7376
if False in check_values:
7477
raise ValueError(
7578
"The value array for the time aggregation rule with operation type"
76-
" COUNT_PERIODS should only contain the values 0 and 1."
79+
" COUNT_PERIODS should only contain the values 0 and 1 (or NaN)."
7780
)
7881

7982
dim_name = get_dict_element(settings.time_scale, settings.time_scale_mapping)
8083

8184
time_dim_name = get_time_dimension_name(value_array, logger)
82-
aggregated_values = value_array.resample({time_dim_name: dim_name})
85+
aggregated_values = value_array.resample({time_dim_name: dim_name}, skipna=True)
8386

8487
result = self._perform_operation(aggregated_values)
8588
# create a new aggregated time dimension based on original time dimension
@@ -164,7 +167,7 @@ def count_groups(self, elem):
164167
"""
165168
# in case of an example array with 5 values [1,1,0,1,0]:
166169
# subtract last 4 values from the first 4 values: [1,0,1,0] - [1,1,0,1]:
167-
# (the result of this example differences: [0,-1,1,0])
170+
# (the result of this example differences: [0,-1,1,-1])
168171
differences = _np.diff(elem)
169172
# First add the first element of the array to the difference array (as this
170173
# could also indicate a beginning of a group or not and the diff is calculated
@@ -208,6 +211,7 @@ def analyze_groups(self, elem, axis):
208211
Returns:
209212
array: array with the analyzed periods, with the same dimensions as elem
210213
"""
214+
# Determine the number of axes in the array
211215
no_axis = len(_np.shape(elem))
212216

213217
# The reduce function that calls this analyze_groups function should be reduces
@@ -224,6 +228,10 @@ def analyze_groups(self, elem, axis):
224228

225229
# in case of 1 dimension:
226230
if no_axis == 1:
231+
# remove NaN values from the array (these are to be ignored)
232+
elem = elem[~_np.isnan(elem)]
233+
if len(elem) == 0:
234+
return 0
227235
if self.settings.operation_type is TimeOperationType.COUNT_PERIODS:
228236
group_result = self.count_groups(elem)
229237
elif self.settings.operation_type is TimeOperationType.MAX_DURATION_PERIODS:

decoimpact/business/workflow/model_builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def _create_rule(rule_data: IRuleData) -> IRule:
159159
rule_data.name,
160160
rule_data.input_variable_names,
161161
MultiArrayOperationType[rule_data.operation_type],
162+
rule_data.ignore_nan
162163
)
163164
elif isinstance(rule_data, IResponseCurveRuleData):
164165
rule = ResponseCurveRule(

decoimpact/data/api/i_combine_results_rule_data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,8 @@ def input_variable_names(self) -> List[str]:
3030
@abstractmethod
3131
def operation_type(self) -> str:
3232
"""Property for the operation_type"""
33+
34+
@property
35+
@abstractmethod
36+
def ignore_nan(self) -> bool:
37+
"""Property for the ignore_nan flag"""

decoimpact/data/entities/combine_results_rule_data.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
class CombineResultsRuleData(ICombineResultsRuleData, RuleData):
2222
"""Class for storing data related to combine results rule"""
2323

24-
def __init__(self, name: str, input_variable_names: List[str], operation_type: str):
24+
def __init__(self, name: str, input_variable_names: List[str],
25+
operation_type: str, ignore_nan: bool = False):
2526
super().__init__(name)
2627
self._input_variable_names = input_variable_names
2728
self._operation_type = operation_type
29+
self._ignore_nan = ignore_nan
2830

2931
@property
3032
def input_variable_names(self) -> List[str]:
@@ -35,3 +37,8 @@ def input_variable_names(self) -> List[str]:
3537
def operation_type(self) -> str:
3638
"""Name of the input variable"""
3739
return self._operation_type
40+
41+
@property
42+
def ignore_nan(self) -> bool:
43+
"""Property for the ignore_nan flag"""
44+
return self._ignore_nan

decoimpact/data/parsers/parser_combine_results_rule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ def parse_dict(self, dictionary: Dict[str, Any], logger: ILogger) -> IRuleData:
4848
if not description:
4949
description = ""
5050

51-
rule_data = CombineResultsRuleData(name, input_variable_names, operation_type)
51+
ignore_nan = get_dict_element("ignore_nan", dictionary, False)
52+
53+
rule_data = CombineResultsRuleData(name, input_variable_names,
54+
operation_type, ignore_nan)
5255
rule_data.output_variable = output_variable_name
5356
rule_data.description = description
5457

docs/manual/rules/combine_results_rule.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ FORMAT
88
operation: <statistic_opperation_applied>
99
input_variables: [<list with_input_variable_names>]
1010
output_variable: <one_output_variable_name>
11+
ignore_nan: <boolean>
1112
```
1213

13-
The combine results rule combines the output of two or more variables to one output variable. The way this data is combined depends on the operation chosen. This could be used for adding mutual exclusive results (e.g., habitat suitability based on flow velocity and water depth) or asses difference between results (e.g., waterlevel and bathymetry to get the water depth).The rule operates one or multiple 3D variables or 2D variables, independent of the time axes, as long as these all have the same dimensions and returns a single 3D or 2D result, either with time axis, depending on input.
14+
The combine results rule combines the output of two or more variables to one output variable. The way this data is combined depends on the operation chosen. This could be used for adding mutual exclusive results (e.g., habitat suitability based on flow velocity and water depth) or assessing difference between results (e.g., waterlevel and bathymetry to get the water depth). The rule operates one or multiple 3D variables or 2D variables, independent of the time axes, as long as these all have the same dimensions and returns a single 3D or 2D result, either with time axis, depending on input.
1415

1516
Operations available: Add, Subtract, Multiply, Average, Median, Min and Max
17+
The parameter ignore_nan is optional and has a default value of False. When this parameter is set to True, empty values (NaN) will be ignored for all operations, except for Multiply.
1618

1719
The rule needs to be applied to an existing 2D/3D variables with or without time axis. A new 2D/3D variable with or without time axis is created when the rule is executed.
1820

@@ -26,6 +28,7 @@ The rule needs to be applied to an existing 2D/3D variables with or without time
2628
operation: subtract
2729
input_variables: ["water_level","water_depth"]
2830
output_variable: bathymetry_time
31+
ignore: True
2932
3033
```
3134

docs/manual/rules/time_aggregation_rule.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ The rule needs to be applied to an existing 2D/3D variable with time axis. A new
3333

3434
Period statistics: Time aggregation rule with COUNT_PERIODS, AVG_DURATION_PERIODS, MIN_DURATION_PERIODS and MAX_DURATION_PERIODS
3535

36-
When the operation type period statistics is used, the user needs to make sure that the input data is always consisting of only 1 and 0. If there is no such layer, the user can make a combination of for example the classification rule together with the time aggregation rule. For example, water depth can be used to check whether the cells are dry or not (this can be done with a classification rule) and with the COUNT_PERIODS operation type in the time aggregation rule the number of consecutive periods within a year or month can be calculated (nr). AVG_DURATION_PERIODS, MIN_DURATION_PERIODS and MAX_DURATION_PERIODS take the respective statistic of the duration for those consecutive periods (duration).
36+
When the operation type period statistics is used, the user needs to make sure that the input data is always consisting of only 1 and 0. If there is no such layer, the user can make a combination of for example the classification rule together with the time aggregation rule. For example, water depth can be used to check whether the cells are dry or not (this can be done with a classification rule) and with the COUNT_PERIODS operation type in the time aggregation rule the number of consecutive periods within a year or month can be calculated (nr). AVG_DURATION_PERIODS, MIN_DURATION_PERIODS and MAX_DURATION_PERIODS take the respective statistic of the duration for those consecutive periods (duration). Empty values (NaN) are allowed and will be ignored. In case for a specific dimension only empty values occur, the result of the aggregation will be 0.
3737

3838

3939

tests/business/entities/rules/test_combine_results_rule.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def test_execute_error_combine_results_rule_different_shapes():
9999

100100
# Arrange & Act
101101
rule = CombineResultsRule(
102-
"test", ["foo_data", "hello_data"], MultiArrayOperationType.MULTIPLY
102+
"test", ["foo_data", "hello_data"], MultiArrayOperationType.MULTIPLY, False
103103
)
104104
value_array = {
105105
"foo_data": _xr.DataArray([[1, 2], [3, 4]]),
@@ -150,6 +150,76 @@ def test_all_operations_combine_results_rule(
150150
_xr.testing.assert_equal(obtained_result, _xr.DataArray(expected_result))
151151

152152

153+
@pytest.mark.parametrize(
154+
"operation, expected_result",
155+
[
156+
(MultiArrayOperationType.MIN, [_np.nan, 5, 3]),
157+
(MultiArrayOperationType.MAX, [_np.nan, 12, 24]),
158+
(MultiArrayOperationType.MULTIPLY, [_np.nan, 420, 432]),
159+
(MultiArrayOperationType.AVERAGE, [_np.nan, 8, 11]),
160+
(MultiArrayOperationType.MEDIAN, [_np.nan, 7, 6]),
161+
(MultiArrayOperationType.ADD, [_np.nan, 24, 33]),
162+
(MultiArrayOperationType.SUBTRACT, [_np.nan, -10, -27]),
163+
],
164+
)
165+
def test_all_operations_incl_nan(
166+
operation: MultiArrayOperationType, expected_result: List[float]
167+
):
168+
"""Test the outcome of each operand for the combine results rule"""
169+
# Arrange
170+
logger = Mock(ILogger)
171+
dict_vars = {
172+
"var1_name": _xr.DataArray([20, 7, 3]),
173+
"var2_name": _xr.DataArray([4, 5, 6]),
174+
"var3_name": _xr.DataArray([_np.nan, 12, 24]),
175+
}
176+
177+
# Act
178+
rule = CombineResultsRule(
179+
"test_name",
180+
["var1_name", "var2_name", "var3_name"],
181+
operation,
182+
)
183+
obtained_result = rule.execute(dict_vars, logger)
184+
185+
# Assert
186+
_xr.testing.assert_equal(obtained_result, _xr.DataArray(expected_result))
187+
188+
189+
@pytest.mark.parametrize(
190+
"operation, expected_result",
191+
[
192+
(MultiArrayOperationType.MIN, [4, 5, 3]),
193+
(MultiArrayOperationType.MAX, [20, 12, 24]),
194+
(MultiArrayOperationType.MULTIPLY, [_np.nan, 420, 432]),
195+
(MultiArrayOperationType.AVERAGE, [12, 8, 11]),
196+
(MultiArrayOperationType.MEDIAN, [12, 7, 6]),
197+
(MultiArrayOperationType.ADD, [24, 24, 33]),
198+
(MultiArrayOperationType.SUBTRACT, [16, -10, -27]),
199+
],
200+
)
201+
def test_all_operations_ignore_nan(
202+
operation: MultiArrayOperationType, expected_result: List[float]
203+
):
204+
"""Test the outcome of each operand for the combine results rule"""
205+
# Arrange
206+
logger = Mock(ILogger)
207+
dict_vars = {
208+
"var1_name": _xr.DataArray([20, 7, 3]),
209+
"var2_name": _xr.DataArray([4, 5, 6]),
210+
"var3_name": _xr.DataArray([_np.nan, 12, 24]),
211+
}
212+
213+
# Act
214+
rule = CombineResultsRule(
215+
"test_name", ["var1_name", "var2_name", "var3_name"], operation, ignore_nan=True
216+
)
217+
obtained_result = rule.execute(dict_vars, logger)
218+
219+
# Assert
220+
_xr.testing.assert_equal(obtained_result, _xr.DataArray(expected_result))
221+
222+
153223
def test_dims_present_in_result():
154224
"""Test that the dims metadata of the result is equal to the one of the first xarray used."""
155225
# Arrange

0 commit comments

Comments
 (0)