diff --git a/icenet_mp/config/data/datasets/full_sicnorth_ssmis_25k_1979_2024_24h_v1.yaml b/icenet_mp/config/data/datasets/full_sicnorth_ssmis_25k_1979_2024_24h_v1.yaml index 9ba7c951..9f0332eb 100644 --- a/icenet_mp/config/data/datasets/full_sicnorth_ssmis_25k_1979_2024_24h_v1.yaml +++ b/icenet_mp/config/data/datasets/full_sicnorth_ssmis_25k_1979_2024_24h_v1.yaml @@ -353,5 +353,10 @@ full-sicnorth-ssmis-25k-1979-2024-24h-v1: offset: 0.0 param: total_standard_uncertainty - statistics: - allow_nans: [algorithm_standard_uncertainty] + # Replace NaNs with 0 for concentration variables and 99 for uncertainty variables + - nan_to_num: + variables: [ice_conc, raw_ice_conc_values] + replace_with: 0.0 + - nan_to_num: + variables: [algorithm_standard_uncertainty, smearing_standard_uncertainty, total_standard_uncertainty] + replace_with: 99 diff --git a/icenet_mp/config/data/datasets/full_sicsouth_ssmis_25k_1979_2024_24h_v1.yaml b/icenet_mp/config/data/datasets/full_sicsouth_ssmis_25k_1979_2024_24h_v1.yaml index cb1a1461..e5836124 100644 --- a/icenet_mp/config/data/datasets/full_sicsouth_ssmis_25k_1979_2024_24h_v1.yaml +++ b/icenet_mp/config/data/datasets/full_sicsouth_ssmis_25k_1979_2024_24h_v1.yaml @@ -764,5 +764,10 @@ full-sicsouth-ssmis-25k-1979-2024-24h-v1: offset: 0.0 param: total_standard_uncertainty - statistics: - allow_nans: [algorithm_standard_uncertainty] + # Replace NaNs with 0 for concentration variables and 99 for uncertainty variables + - nan_to_num: + variables: [ice_conc, raw_ice_conc_values] + replace_with: 0.0 + - nan_to_num: + variables: [algorithm_standard_uncertainty, smearing_standard_uncertainty, total_standard_uncertainty] + replace_with: 99 diff --git a/icenet_mp/config/data/datasets/samp_sicnorth_ssmis_25k_2017_2019_24h_v1.yaml b/icenet_mp/config/data/datasets/samp_sicnorth_ssmis_25k_2017_2019_24h_v1.yaml index 1baaf545..f9991356 100644 --- a/icenet_mp/config/data/datasets/samp_sicnorth_ssmis_25k_2017_2019_24h_v1.yaml +++ b/icenet_mp/config/data/datasets/samp_sicnorth_ssmis_25k_2017_2019_24h_v1.yaml @@ -53,5 +53,10 @@ samp-sicnorth-ssmis-25k-2017-2019-24h-v1: offset: 0.0 param: total_standard_uncertainty - statistics: - allow_nans: [algorithm_standard_uncertainty] + # Replace NaNs with 0 for concentration variables and 99 for uncertainty variables + - nan_to_num: + variables: [ice_conc, raw_ice_conc_values] + replace_with: 0.0 + - nan_to_num: + variables: [algorithm_standard_uncertainty, smearing_standard_uncertainty, total_standard_uncertainty] + replace_with: 99 diff --git a/icenet_mp/config/data/datasets/samp_sicsouth_ssmis_25k_2017_2019_24h_v1.yaml b/icenet_mp/config/data/datasets/samp_sicsouth_ssmis_25k_2017_2019_24h_v1.yaml index d21e3018..b9198850 100644 --- a/icenet_mp/config/data/datasets/samp_sicsouth_ssmis_25k_2017_2019_24h_v1.yaml +++ b/icenet_mp/config/data/datasets/samp_sicsouth_ssmis_25k_2017_2019_24h_v1.yaml @@ -53,5 +53,10 @@ samp-sicsouth-ssmis-25k-2017-2019-24h-v1: offset: 0.0 param: total_standard_uncertainty - statistics: - allow_nans: [algorithm_standard_uncertainty] + # Replace NaNs with 0 for concentration variables and 99 for uncertainty variables + - nan_to_num: + variables: [ice_conc, raw_ice_conc_values] + replace_with: 0.0 + - nan_to_num: + variables: [algorithm_standard_uncertainty, smearing_standard_uncertainty, total_standard_uncertainty] + replace_with: 99 diff --git a/icenet_mp/data_processors/filters/__init__.py b/icenet_mp/data_processors/filters/__init__.py index 02bb79cf..d554091c 100644 --- a/icenet_mp/data_processors/filters/__init__.py +++ b/icenet_mp/data_processors/filters/__init__.py @@ -3,18 +3,25 @@ from anemoi.transform.filters import filter_registry from .doubling_filter import DoublingFilter +from .nan_to_num import NanToNum logger = logging.getLogger(__name__) def register_filters() -> None: """Register all filters with anemoi-transform.""" - if "doubling_filter" not in filter_registry.registered: - filter_registry.register("doubling_filter", DoublingFilter) - logger.debug("Registered DoublingFilter with anemoi-transform.") + filters = { + "doubling_filter": DoublingFilter, + "nan_to_num": NanToNum, + } + for filter_name, filter_class in filters.items(): + if filter_name not in filter_registry.registered: + filter_registry.register(filter_name, filter_class) + logger.debug("Registered %s with anemoi-transform.", filter_class.__name__) __all__ = [ "DoublingFilter", + "NanToNum", "register_filters", ] diff --git a/icenet_mp/data_processors/filters/doubling_filter.py b/icenet_mp/data_processors/filters/doubling_filter.py index de470e2a..4205d9c0 100644 --- a/icenet_mp/data_processors/filters/doubling_filter.py +++ b/icenet_mp/data_processors/filters/doubling_filter.py @@ -16,9 +16,12 @@ def __init__( self.input_field = input_field self.output_field = output_field - def forward_transform(self, input_field: ekd.Field) -> Iterator[ekd.Field]: - """An example forward transform that doubles the input field as a new field.""" - yield input_field - yield self.new_field_from_numpy( - input_field.to_numpy() * 2, template=input_field, param=self.output_field - ) + def forward_transform(self, *input_fields: ekd.Field) -> Iterator[ekd.Field]: + """A forward transform that doubles the input field as a new field.""" + for input_field in input_fields: + yield input_field + yield self.new_field_from_numpy( + input_field.to_numpy() * 2, + template=input_field, + param=self.output_field, + ) diff --git a/icenet_mp/data_processors/filters/nan_to_num.py b/icenet_mp/data_processors/filters/nan_to_num.py new file mode 100644 index 00000000..838210bf --- /dev/null +++ b/icenet_mp/data_processors/filters/nan_to_num.py @@ -0,0 +1,19 @@ +"""Filter that replaces all NaN values in the 'variables' list with the 'replace_with' value.""" + +import earthkit.data as ekd +import numpy as np +from anemoi.transform.filter import SingleFieldFilter + + +class NanToNum(SingleFieldFilter): + required_inputs = ("variables", "replace_with") + + def forward_select(self) -> dict[str, str | list[str] | tuple[str]]: + """Select which fields to transform.""" + return {"param": list(self.variables)} + + def forward_transform(self, field: ekd.Field) -> ekd.Field: + """A forward transform that replaces NaNs in the input field with 'replace_with'.""" + return self.new_field_from_numpy( + np.nan_to_num(field.to_numpy(), nan=self.replace_with), template=field + )