From dd69045077a764e08e46ecfb57b76cfcbb4ee289 Mon Sep 17 00:00:00 2001
From: "allcontributors[bot]"
<46447321+allcontributors[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 07:27:16 +0000
Subject: [PATCH 01/10] update CONTRIBUTORS.md
---
CONTRIBUTORS.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 4db7f4834..707e6c37b 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -1,6 +1,6 @@
-[](#contributors-)
+[](#contributors-)
## Contributors ✨
@@ -75,6 +75,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
 psotom 💻 💡 🤔 ⚠️ 🔬 📖 |
+  Luis Hebrero 📖 |
From c5df2f06dbd5b30083a50300885e7349df0cc400 Mon Sep 17 00:00:00 2001
From: "allcontributors[bot]"
<46447321+allcontributors[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 07:27:17 +0000
Subject: [PATCH 02/10] update .all-contributorsrc
---
.all-contributorsrc | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/.all-contributorsrc b/.all-contributorsrc
index 6f1ecd3cf..beb574755 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -586,6 +586,15 @@
"research",
"doc"
]
+ },
+ {
+ "login": "luisheb",
+ "name": "Luis Hebrero",
+ "avatar_url": "https://avatars.githubusercontent.com/u/24703335?v=4",
+ "profile": "https://github.com/luisheb",
+ "contributions": [
+ "doc"
+ ]
}
],
"contributorsPerLine": 7,
From f76a037c5ac70d8c7859b36e92d27be6d4d787af Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Sun, 12 Jan 2025 19:05:51 +0100
Subject: [PATCH 03/10] Updated mean an sum functions for FData, FDataGrid and
FDataBasis to correctly handle NaN values in coefficients
irreg
Updated mean an sum functions for FData, FDataGrid, FDataBasis and FDataIrregular to correctly handle NaN values in coefficients
---
skfda/representation/_functional_data.py | 9 ++-
skfda/representation/basis/_fdatabasis.py | 49 +++++++++---
skfda/representation/grid.py | 98 ++++++++++++++++++++---
skfda/representation/irregular.py | 54 +++++++++++++
4 files changed, 184 insertions(+), 26 deletions(-)
diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py
index ee8813bbc..dda4a80c2 100644
--- a/skfda/representation/_functional_data.py
+++ b/skfda/representation/_functional_data.py
@@ -882,6 +882,7 @@ def mean(
out: None = None,
keepdims: bool = False,
skipna: bool = False,
+ min_count: int = 0,
) -> T:
"""Compute the mean of all the samples.
@@ -891,6 +892,9 @@ def mean(
out: Used for compatibility with numpy. Must be None.
keepdims: Used for compatibility with numpy. Must be False.
skipna: Wether the NaNs are ignored or not.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
Returns:
A FData object with just one sample representing
@@ -902,10 +906,7 @@ def mean(
"Not implemented for that parameter combination",
)
- return (
- self.sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
- / self.n_samples
- )
+ return self
@abstractmethod
def to_grid(
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index b528e3098..31264d944 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -427,20 +427,49 @@ def sum( # noqa: WPS125
"""
super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
- coefs = (
- np.nansum(self.coefficients, axis=0) if skipna
- else np.sum(self.coefficients, axis=0)
- )
-
- if min_count > 0:
- valid = ~np.isnan(self.coefficients)
- n_valid = np.sum(valid, axis=0)
- coefs[n_valid < min_count] = np.nan
+ valid_functions = ~self.isna()
+ valid_coefficients = self.coefficients[valid_functions]
+
+ coefs = np.sum(valid_coefficients, axis=0)
return self.copy(
coefficients=coefs,
sample_names=(None,),
)
+
+ def mean( # noqa: WPS125
+ self: T,
+ *,
+ axis: Optional[int] = None,
+ dtype: None = None,
+ out: None = None,
+ keepdims: bool = False,
+ skipna: bool = False,
+ min_count: int = 0,
+ ) -> T:
+ """Compute the mean of all the samples.
+
+ Args:
+ axis: Used for compatibility with numpy. Must be None or 0.
+ dtype: Used for compatibility with numpy. Must be None.
+ out: Used for compatibility with numpy. Must be None.
+ keepdims: Used for compatibility with numpy. Must be False.
+ skipna: Wether the NaNs are ignored or not.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
+
+ Returns:
+ A FDataBasis object with just one sample representing
+ the mean of all the samples in the original object.
+ """
+ super().mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims,
+ skipna=skipna)
+
+ return (
+ self.sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
+ / np.sum(~self.isna())
+ )
def var(
self: T,
@@ -998,7 +1027,7 @@ def isna(self) -> NDArrayBool:
Returns:
na_values (np.ndarray): Positions of NA.
"""
- return np.all( # type: ignore[no-any-return]
+ return np.any( # type: ignore[no-any-return]
np.isnan(self.coefficients),
axis=1,
)
diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py
index 50bb96169..12e6e6cc6 100644
--- a/skfda/representation/grid.py
+++ b/skfda/representation/grid.py
@@ -544,6 +544,60 @@ def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]:
def _get_input_points(self: T) -> GridPoints:
return self.grid_points
+
+ def _compute_aggregate(
+ self: T,
+ operation = str,
+ *,
+ skipna: bool = False,
+ min_count: int = 0,
+ ) -> T:
+ """Compute a defined aggregation operation of the samples.
+
+ Args:
+ operation: Operation to be performed. Can be 'mean', 'sum' or
+ 'var'.
+ axis: Used for compatibility with numpy. Must be None or 0.
+ out: Used for compatibility with numpy. Must be None.
+ keepdims: Used for compatibility with numpy. Must be False.
+ skipna: Wether the NaNs are ignored or not.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
+
+ Returns:
+ An FDataGrid object with just one sample representing
+ the aggregation of all the samples in the original object.
+
+ """
+ if operation not in {'sum', 'mean', 'var'}:
+ raise ValueError("Invalid operation."
+ "Must be one of 'sum', 'mean', or 'var'.")
+
+ if skipna:
+ agg_func = {
+ 'sum': np.nansum,
+ 'mean': np.nanmean,
+ 'var': np.nanvar
+ }[operation]
+ else:
+ agg_func = {
+ 'sum': np.sum,
+ 'mean': np.mean,
+ 'var': np.var
+ }[operation]
+
+ data = agg_func(self.data_matrix, axis=0, keepdims=True)
+
+ if min_count > 0:
+ valid = ~np.isnan(self.data_matrix)
+ n_valid = np.sum(valid, axis=0)
+ data[n_valid < min_count] = np.nan
+
+ return self.copy(
+ data_matrix=data,
+ sample_names=(None,),
+ )
def sum( # noqa: WPS125
self: T,
@@ -583,20 +637,40 @@ def sum( # noqa: WPS125
"""
super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
- data = (
- np.nansum(self.data_matrix, axis=0, keepdims=True) if skipna
- else np.sum(self.data_matrix, axis=0, keepdims=True)
- )
+ return self._compute_aggregate(operation='sum', skipna=skipna,
+ min_count=min_count)
- if min_count > 0:
- valid = ~np.isnan(self.data_matrix)
- n_valid = np.sum(valid, axis=0)
- data[n_valid < min_count] = np.nan
+ def mean( # noqa: WPS125
+ self: T,
+ *,
+ axis: Optional[int] = None,
+ dtype: None = None,
+ out: None = None,
+ keepdims: bool = False,
+ skipna: bool = False,
+ min_count: int = 0,
+ ) -> T:
+ """Compute the mean of all the samples.
- return self.copy(
- data_matrix=data,
- sample_names=(None,),
- )
+ Args:
+ axis: Used for compatibility with numpy. Must be None or 0.
+ dtype: Used for compatibility with numpy. Must be None.
+ out: Used for compatibility with numpy. Must be None.
+ keepdims: Used for compatibility with numpy. Must be False.
+ skipna: Wether the NaNs are ignored or not.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
+
+ Returns:
+ A FDataGrid object with just one sample representing
+ the mean of all the samples in the original object.
+ """
+ super().mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims,
+ skipna=skipna)
+
+ return self._compute_aggregate(operation='mean', skipna=skipna,
+ min_count=min_count)
def var(self: T, correction: int = 0) -> T:
"""Compute the variance of a set of samples in a FDataGrid object.
diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py
index cf19c8cae..bea1ba032 100644
--- a/skfda/representation/irregular.py
+++ b/skfda/representation/irregular.py
@@ -716,6 +716,60 @@ def sum( # noqa: WPS125
values=sum_values,
sample_names=(None,),
)
+
+ def mean( # noqa: WPS125
+ self: T,
+ *,
+ axis: Optional[int] = None,
+ dtype: None = None,
+ out: None = None,
+ keepdims: bool = False,
+ skipna: bool = False,
+ min_count: int = 0,
+ ) -> T:
+ """Compute the mean of all the samples.
+
+ Args:
+ axis: Used for compatibility with numpy. Must be None or 0.
+ dtype: Used for compatibility with numpy. Must be None.
+ out: Used for compatibility with numpy. Must be None.
+ keepdims: Used for compatibility with numpy. Must be False.
+ skipna: Wether the NaNs are ignored or not.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
+
+ Returns:
+ An FDataIrregular object with just one sample representing
+ the mean of all the samples in the original object.
+ """
+ super().mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims,
+ skipna=skipna)
+
+ common_points, common_values = self._get_common_points_and_values()
+
+ if len(common_points) == 0:
+ raise ValueError("No common points in FDataIrregular object")
+
+ sum_function = np.nansum if skipna else np.sum
+ sum_values = sum_function(common_values, axis=0)
+
+ if skipna:
+ count_values = np.sum(~np.isnan(common_values), axis=0)
+ else:
+ count_values = np.full(sum_values.shape, self.n_samples)
+
+ if min_count > 0:
+ count_values[count_values < min_count] = np.nan
+
+ mean_values = sum_values / count_values
+
+ return FDataIrregular(
+ start_indices=np.array([0]),
+ points=common_points,
+ values=mean_values,
+ sample_names=(None,),
+ )
def var(self: T, correction: int = 0) -> T:
"""Compute the variance of all the samples.
From e3911ad30405792d5a1523458e08ca7287d9ff6f Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Sat, 1 Mar 2025 14:06:31 +0100
Subject: [PATCH 04/10] Changes suggested by author
---
skfda/representation/basis/_fdatabasis.py | 14 +++++++-----
skfda/representation/grid.py | 27 ++++++++++++++++-------
skfda/representation/irregular.py | 11 ++++++---
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index 31264d944..77b4a9150 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -455,9 +455,8 @@ def mean( # noqa: WPS125
out: Used for compatibility with numpy. Must be None.
keepdims: Used for compatibility with numpy. Must be False.
skipna: Wether the NaNs are ignored or not.
- min_count: Number of valid (non NaN) data to have in order
- for the a variable to not be NaN when `skipna` is
- `True`.
+ min_count: Ignored, used for compatibility with FDataGrid
+ and FDataIrregular.
Returns:
A FDataBasis object with just one sample representing
@@ -467,8 +466,13 @@ def mean( # noqa: WPS125
skipna=skipna)
return (
- self.sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
- / np.sum(~self.isna())
+ self.sum(
+ axis=axis,
+ out=out,
+ keepdims=keepdims,
+ skipna=skipna,
+ )
+ / np.sum(~self.isna()),
)
def var(
diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py
index 12e6e6cc6..315d2a6d8 100644
--- a/skfda/representation/grid.py
+++ b/skfda/representation/grid.py
@@ -547,7 +547,7 @@ def _get_input_points(self: T) -> GridPoints:
def _compute_aggregate(
self: T,
- operation = str,
+ operation: str,
*,
skipna: bool = False,
min_count: int = 0,
@@ -589,7 +589,7 @@ def _compute_aggregate(
data = agg_func(self.data_matrix, axis=0, keepdims=True)
- if min_count > 0:
+ if min_count > 0 and skipna:
valid = ~np.isnan(self.data_matrix)
n_valid = np.sum(valid, axis=0)
data[n_valid < min_count] = np.nan
@@ -637,8 +637,11 @@ def sum( # noqa: WPS125
"""
super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
- return self._compute_aggregate(operation='sum', skipna=skipna,
- min_count=min_count)
+ return self._compute_aggregate(
+ operation='sum',
+ skipna=skipna,
+ min_count=min_count,
+ )
def mean( # noqa: WPS125
self: T,
@@ -666,11 +669,19 @@ def mean( # noqa: WPS125
A FDataGrid object with just one sample representing
the mean of all the samples in the original object.
"""
- super().mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims,
- skipna=skipna)
+ super().mean(
+ axis=axis,
+ dtype=dtype,
+ out=out,
+ keepdims=keepdims,
+ skipna=skipna,
+ )
- return self._compute_aggregate(operation='mean', skipna=skipna,
- min_count=min_count)
+ return self._compute_aggregate(
+ operation='mean',
+ skipna=skipna,
+ min_count=min_count,
+ )
def var(self: T, correction: int = 0) -> T:
"""Compute the variance of a set of samples in a FDataGrid object.
diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py
index bea1ba032..dfa916230 100644
--- a/skfda/representation/irregular.py
+++ b/skfda/representation/irregular.py
@@ -743,8 +743,13 @@ def mean( # noqa: WPS125
An FDataIrregular object with just one sample representing
the mean of all the samples in the original object.
"""
- super().mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims,
- skipna=skipna)
+ super().mean(
+ axis=axis,
+ dtype=dtype,
+ out=out,
+ keepdims=keepdims,
+ skipna=skipna,
+ )
common_points, common_values = self._get_common_points_and_values()
@@ -759,7 +764,7 @@ def mean( # noqa: WPS125
else:
count_values = np.full(sum_values.shape, self.n_samples)
- if min_count > 0:
+ if min_count > 0 and skipna:
count_values[count_values < min_count] = np.nan
mean_values = sum_values / count_values
From 5ebe144c3b6abdcae9e4ee5e32e54f05a4e8b113 Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Fri, 14 Mar 2025 13:09:53 +0100
Subject: [PATCH 05/10] .
---
skfda/representation/basis/_fdatabasis.py | 9 ++++++++-
skfda/representation/irregular.py | 2 +-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index 77b4a9150..d42556584 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -428,9 +428,15 @@ def sum( # noqa: WPS125
super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna)
valid_functions = ~self.isna()
+ valid_counts = np.sum(valid_functions, axis=0)
valid_coefficients = self.coefficients[valid_functions]
- coefs = np.sum(valid_coefficients, axis=0)
+ coefs = (
+ np.nansum(valid_coefficients, axis=0) if skipna
+ else np.sum(valid_coefficients, axis=0)
+ )
+
+ coefs = np.where(valid_counts >= min_count, coefs, np.nan)
return self.copy(
coefficients=coefs,
@@ -471,6 +477,7 @@ def mean( # noqa: WPS125
out=out,
keepdims=keepdims,
skipna=skipna,
+ min_count=min_count,
)
/ np.sum(~self.isna()),
)
diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py
index dfa916230..c5a8afdf9 100644
--- a/skfda/representation/irregular.py
+++ b/skfda/representation/irregular.py
@@ -762,7 +762,7 @@ def mean( # noqa: WPS125
if skipna:
count_values = np.sum(~np.isnan(common_values), axis=0)
else:
- count_values = np.full(sum_values.shape, self.n_samples)
+ count_values = self.n_samples
if min_count > 0 and skipna:
count_values[count_values < min_count] = np.nan
From dda99f2cb6441463ddea76314467487b2e3b8994 Mon Sep 17 00:00:00 2001
From: "allcontributors[bot]"
<46447321+allcontributors[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 07:27:16 +0000
Subject: [PATCH 06/10] update CONTRIBUTORS.md
---
CONTRIBUTORS.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index fee9f68af..707e6c37b 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -1,6 +1,6 @@
-[](#contributors-)
+[](#contributors-)
## Contributors ✨
@@ -76,7 +76,6 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
 psotom 💻 💡 🤔 ⚠️ 🔬 📖 |
 Luis Hebrero 📖 |
-  E69D68U85 💻 |
From 2b8422ed71b8f8ac69e3dac0b881796d5a996ace Mon Sep 17 00:00:00 2001
From: "allcontributors[bot]"
<46447321+allcontributors[bot]@users.noreply.github.com>
Date: Wed, 23 Oct 2024 07:27:17 +0000
Subject: [PATCH 07/10] update .all-contributorsrc
---
.all-contributorsrc | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/.all-contributorsrc b/.all-contributorsrc
index 18275d90c..beb574755 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -595,15 +595,6 @@
"contributions": [
"doc"
]
- },
- {
- "login": "E105D104U125",
- "name": "E69D68U85",
- "avatar_url": "https://avatars.githubusercontent.com/u/72515278?v=4",
- "profile": "https://github.com/E105D104U125",
- "contributions": [
- "code"
- ]
}
],
"contributorsPerLine": 7,
From 65d319a7a0e5083335593cae274a1d87b4b0b680 Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Sun, 12 Jan 2025 19:05:51 +0100
Subject: [PATCH 08/10] Updated mean an sum functions for FData, FDataGrid and
FDataBasis to correctly handle NaN values in coefficients
irreg
Updated mean an sum functions for FData, FDataGrid, FDataBasis and FDataIrregular to correctly handle NaN values in coefficients
---
skfda/representation/basis/_fdatabasis.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index d42556584..1ddde6911 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -461,8 +461,9 @@ def mean( # noqa: WPS125
out: Used for compatibility with numpy. Must be None.
keepdims: Used for compatibility with numpy. Must be False.
skipna: Wether the NaNs are ignored or not.
- min_count: Ignored, used for compatibility with FDataGrid
- and FDataIrregular.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
Returns:
A FDataBasis object with just one sample representing
From ca46824bc16a090f77ee75cba2516d0ff4567737 Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Sat, 1 Mar 2025 14:06:31 +0100
Subject: [PATCH 09/10] Changes suggested by author
---
skfda/representation/basis/_fdatabasis.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index 1ddde6911..d42556584 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -461,9 +461,8 @@ def mean( # noqa: WPS125
out: Used for compatibility with numpy. Must be None.
keepdims: Used for compatibility with numpy. Must be False.
skipna: Wether the NaNs are ignored or not.
- min_count: Number of valid (non NaN) data to have in order
- for the a variable to not be NaN when `skipna` is
- `True`.
+ min_count: Ignored, used for compatibility with FDataGrid
+ and FDataIrregular.
Returns:
A FDataBasis object with just one sample representing
From 1c0bfc73b487729cf2cde22b2e1d3980eca42cb1 Mon Sep 17 00:00:00 2001
From: aleexarias
Date: Fri, 14 Mar 2025 13:33:20 +0100
Subject: [PATCH 10/10] min_count comment fixed in mean in FDataBasis
---
skfda/representation/basis/_fdatabasis.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py
index d42556584..1ddde6911 100644
--- a/skfda/representation/basis/_fdatabasis.py
+++ b/skfda/representation/basis/_fdatabasis.py
@@ -461,8 +461,9 @@ def mean( # noqa: WPS125
out: Used for compatibility with numpy. Must be None.
keepdims: Used for compatibility with numpy. Must be False.
skipna: Wether the NaNs are ignored or not.
- min_count: Ignored, used for compatibility with FDataGrid
- and FDataIrregular.
+ min_count: Number of valid (non NaN) data to have in order
+ for the a variable to not be NaN when `skipna` is
+ `True`.
Returns:
A FDataBasis object with just one sample representing