Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ jobs:
key: benchmark-state-${{ hashFiles('benchmarks/**') }}

- name: Install dependencies
run: pip install 'asv>=0.6.4' py-rattler
# https://github.com/airspeed-velocity/asv/issues/1577
run: pip install 'asv>=0.6.4' 'py-rattler<0.22'

- name: Configure ASV
working-directory: ${{ env.ASV_DIR }}
Expand Down
3 changes: 2 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def confusion_matrix(
mtx = _confusion_matrix(orig, new, labels=unique_labels)
if normalize:
sums = mtx.sum(axis=1)[:, np.newaxis]
mtx = np.divide(mtx, sums, where=sums != 0)
mtx = mtx.astype(np.float64)
np.divide(mtx, sums, where=sums != 0, out=mtx)
Comment on lines +82 to +83
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expect unitialized memory in output i.e., from the warning in the "failing" test, I would think out= should have the result of np.empty, not mtx. Or am I misinterpreting?

Copy link
Copy Markdown
Member Author

@flying-sheep flying-sheep Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After my change, what we do is to leave all values in mtx where sums==0 as they are (and divide the others):

mtx[sums != 0] /= sums

Before my change we replaced all values in mtx where sums==0 with uninitialized memory (and divide the others):

mtx_new = np.empty(mtx.shape)
mtx[sums != 0] = mtx_new / sums

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wow! "If not provided or None, a freshly-allocated array is returned." from the numpy docs on divide - I didn't realize this was default behavior to return uninitialized memory. Ok then!

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, some nuts behavior. Good they have that warning now.


# Label
orig_name = "Original labels" if orig.name is None else orig.name
Expand Down
Binary file added tests/_data/objs-t-test.npz
Binary file not shown.
Binary file added tests/_data/objs-wilcoxon.npz
Binary file not shown.
Binary file removed tests/_data/objs_t_test.pkl
Binary file not shown.
Binary file removed tests/_data/objs_wilcoxon.pkl
Binary file not shown.
163 changes: 60 additions & 103 deletions tests/test_rank_genes_groups.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from __future__ import annotations

import pickle
from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, TypedDict, cast

import numpy as np
import pandas as pd
Expand All @@ -24,8 +23,9 @@

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any
from typing import Any, Literal

from numpy.lib.npyio import NpzFile
from numpy.typing import NDArray

HERE = Path(__file__).parent
Expand Down Expand Up @@ -59,126 +59,83 @@ def get_example_data(array_type: Callable[[np.ndarray], Any]) -> AnnData:
return adata


def get_true_scores() -> tuple[
NDArray[np.object_],
NDArray[np.object_],
NDArray[np.floating],
NDArray[np.floating],
]:
with (DATA_PATH / "objs_t_test.pkl").open("rb") as f:
true_scores_t_test, true_names_t_test = pickle.load(f)
with (DATA_PATH / "objs_wilcoxon.pkl").open("rb") as f:
true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f)

return (
true_names_t_test,
true_names_wilcoxon,
true_scores_t_test,
true_scores_wilcoxon,
)
class Expected(TypedDict):
names: NDArray[np.str_]
scores: NDArray[np.floating]


def get_true_scores(method: Literal["t-test", "wilcoxon"]) -> Expected:
path = DATA_PATH / f"objs-{method}.npz"
with (
path.open("rb") as f,
cast("NpzFile", np.load(f, allow_pickle=False)) as z,
):
expected = dict(z)
return Expected(names=expected["names"].astype("T"), scores=expected["scores"])


# TODO: Make dask compatible
@pytest.mark.parametrize("method", ["t-test", "wilcoxon"])
@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
def test_results(array_type):
def test_results(
subtests: pytest.Subtests, array_type, method: Literal["t-test", "wilcoxon"]
) -> None:
seed(1234)

adata = get_example_data(array_type)
assert adata.raw is None # Assumption for later checks
expected = get_true_scores(method)
# no clue why we did this: https://github.com/scverse/scanpy/commit/7f10fa3138374bbc664776c6aae1c0e05cf2c5cf
n = 7 if method == "wilcoxon" else None

(
true_names_t_test,
true_names_wilcoxon,
true_scores_t_test,
true_scores_wilcoxon,
) = get_true_scores()

rank_genes_groups(adata, "true_groups", n_genes=20, method="t-test")

adata.uns["rank_genes_groups"]["names"] = adata.uns["rank_genes_groups"][
"names"
].astype(true_names_t_test.dtype)
rank_genes_groups(adata, "true_groups", n_genes=20, method=method)
results = adata.uns["rank_genes_groups"]

for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_t_test[name], adata.uns["rank_genes_groups"]["scores"][name]
)
assert np.array_equal(true_names_t_test, adata.uns["rank_genes_groups"]["names"])
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False

rank_genes_groups(adata, "true_groups", n_genes=20, method="wilcoxon")

adata.uns["rank_genes_groups"]["names"] = adata.uns["rank_genes_groups"][
"names"
].astype(true_names_wilcoxon.dtype)

for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)
assert np.array_equal(
true_names_wilcoxon[:7], adata.uns["rank_genes_groups"]["names"][:7]
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for g in range(expected["names"].shape[0]):
with subtests.test(group=g):
assert np.allclose(expected["scores"][g, :n], results["scores"][str(g)][:n])
assert np.array_equal(
expected["names"][g, :n], results["names"][str(g)][:n]
)
assert results["params"]["use_raw"] is False


@pytest.mark.parametrize("method", ["t-test", "wilcoxon"])
@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
def test_results_layers(array_type):
def test_results_layers(
subtests: pytest.Subtests, array_type, method: Literal["t-test", "wilcoxon"]
) -> None:
seed(1234)

adata = get_example_data(array_type)
adata.layers["to_test"] = adata.X.copy()
x = adata.X.tolil() if isinstance(adata.X, CSBase) else adata.X
mask = np.random.randint(0, 2, adata.shape, dtype=bool)
x[mask] = 0
adata.X = array_type(x)

_, _, true_scores_t_test, true_scores_wilcoxon = get_true_scores()

# Wilcoxon
rank_genes_groups(
adata,
"true_groups",
method="wilcoxon",
layer="to_test",
n_genes=20,
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

rank_genes_groups(adata, "true_groups", method="wilcoxon", n_genes=20)
for name in true_scores_t_test.dtype.names:
assert not np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

# t-test
rank_genes_groups(
adata,
"true_groups",
method="t-test",
layer="to_test",
use_raw=False,
n_genes=20,
)
for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_t_test[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

rank_genes_groups(adata, "true_groups", method="t-test", n_genes=20)
for name in true_scores_t_test.dtype.names:
assert not np.allclose(
true_scores_t_test[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
scores = get_true_scores(method)["scores"]

with subtests.test("layer"):
rank_genes_groups(
adata,
"true_groups",
method=method,
layer="to_test",
use_raw=None if method == "wilcoxon" else False,
n_genes=20,
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for g in range(scores.shape[0]):
np.testing.assert_allclose(
scores[g, :7],
adata.uns["rank_genes_groups"]["scores"][str(g)][:7],
rtol=1e-5, # default of np.allclose
)

with subtests.test("X"):
rank_genes_groups(adata, "true_groups", method=method, n_genes=20)
for g in range(scores.shape[0]):
assert not np.allclose(
scores[g, :7], adata.uns["rank_genes_groups"]["scores"][str(g)][:7]
)


def test_rank_genes_groups_use_raw():
Expand Down
Loading