Skip to content

Commit f3b1eaa

Browse files
authored
Add tests for sparse mean_var accuracy (#92)
1 parent 8bafd80 commit f3b1eaa

File tree

3 files changed

+50
-0
lines changed

3 files changed

+50
-0
lines changed

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ overrides.matrix.extras.features = [
8585
]
8686
overrides.matrix.extras.dependencies = [
8787
{ if = [ "full" ], value = "scipy-stubs" },
88+
{ if = [ "full" ], value = "scikit-learn" },
8889
]
8990

9091
[[tool.hatch.envs.hatch-test.matrix]]

tests/test_stats.py

+36
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,42 @@ def test_mean_var(
223223
np.testing.assert_array_almost_equal(var, var_expected) # type: ignore[arg-type]
224224

225225

226+
@pytest.mark.skipif(not find_spec("sklearn"), reason="sklearn not installed")
227+
@pytest.mark.array_type(Flags.Sparse, skip=Flags.Matrix | Flags.Dask | Flags.Disk | Flags.Gpu)
228+
@pytest.mark.parametrize("axis", [0, 1])
229+
def test_mean_var_sparse_64(array_type: ArrayType[types.CSArray], axis: Literal[0, 1]) -> None:
230+
"""Test that we’re equivalent for 64 bit."""
231+
from sklearn.utils.sparsefuncs import mean_variance_axis
232+
233+
mtx = array_type.random((10000, 1000), dtype=np.float64)
234+
235+
mean_fau, var_fau = stats.mean_var(mtx, axis=axis)
236+
mean_skl, var_skl = mean_variance_axis(mtx, axis)
237+
238+
np.testing.assert_allclose(mean_fau, mean_skl, rtol=1.0e-5, atol=1.0e-8)
239+
np.testing.assert_allclose(var_fau, var_skl, rtol=1.0e-5, atol=1.0e-8)
240+
241+
242+
@pytest.mark.skipif(not find_spec("sklearn"), reason="sklearn not installed")
243+
@pytest.mark.array_type(Flags.Sparse, skip=Flags.Matrix | Flags.Dask | Flags.Disk | Flags.Gpu)
244+
def test_mean_var_sparse_32(array_type: ArrayType[types.CSArray]) -> None:
245+
"""Test whether we are more accurate for 32 bit."""
246+
from sklearn.utils.sparsefuncs import mean_variance_axis
247+
248+
mtx64 = array_type.random((10000, 1000), dtype=np.float64)
249+
mtx32 = mtx64.astype(np.float32)
250+
251+
fau, skl = {}, {}
252+
for n_bit, mtx in [(32, mtx32), (64, mtx64)]:
253+
fau[n_bit] = stats.mean_var(mtx, axis=0)
254+
skl[n_bit] = mean_variance_axis(mtx, 0)
255+
256+
for stat, _ in enumerate(["mean", "var"]):
257+
resid_fau = np.mean(np.abs(fau[64][stat] - fau[32][stat]))
258+
resid_skl = np.mean(np.abs(skl[64][stat] - skl[32][stat]))
259+
assert resid_fau < resid_skl
260+
261+
226262
@pytest.mark.array_type(skip={Flags.Disk, *ATS_CUPY_SPARSE})
227263
@pytest.mark.parametrize(
228264
("axis", "expected"),

typings/sklearn/utils/sparsefuncs.pyi

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# SPDX-License-Identifier: MPL-2.0
2+
from typing import Literal
3+
4+
import numpy as np
5+
from numpy.typing import NDArray
6+
from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix
7+
8+
def mean_variance_axis(
9+
X: csc_array | csc_matrix | csr_array | csr_matrix, # noqa: N803
10+
axis: Literal[0, 1],
11+
weights: NDArray[np.floating] | None = None,
12+
return_sum_weights: bool = False,
13+
) -> tuple[NDArray[np.float64], NDArray[np.float64]]: ...

0 commit comments

Comments
 (0)