Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ jobs:
key: benchmark-state-${{ hashFiles('benchmarks/**') }}

- name: Install dependencies
run: pip install 'asv>=0.6.4' py-rattler
# https://github.com/airspeed-velocity/asv/issues/1577
run: pip install 'asv>=0.6.4' 'py-rattler<0.22'

- name: Configure ASV
working-directory: ${{ env.ASV_DIR }}
Expand Down
3 changes: 2 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def confusion_matrix(
mtx = _confusion_matrix(orig, new, labels=unique_labels)
if normalize:
sums = mtx.sum(axis=1)[:, np.newaxis]
mtx = np.divide(mtx, sums, where=sums != 0)
mtx = mtx.astype(np.float64)
np.divide(mtx, sums, where=sums != 0, out=mtx)

# Label
orig_name = "Original labels" if orig.name is None else orig.name
Expand Down
Binary file added tests/_data/objs-t-test.npz
Binary file not shown.
Binary file added tests/_data/objs-wilcoxon.npz
Binary file not shown.
Binary file removed tests/_data/objs_t_test.pkl
Binary file not shown.
Binary file removed tests/_data/objs_wilcoxon.pkl
Binary file not shown.
163 changes: 60 additions & 103 deletions tests/test_rank_genes_groups.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from __future__ import annotations

import pickle
from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, TypedDict, cast

import numpy as np
import pandas as pd
Expand All @@ -24,8 +23,9 @@

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any
from typing import Any, Literal

from numpy.lib.npyio import NpzFile
from numpy.typing import NDArray

HERE = Path(__file__).parent
Expand Down Expand Up @@ -59,126 +59,83 @@ def get_example_data(array_type: Callable[[np.ndarray], Any]) -> AnnData:
return adata


def get_true_scores() -> tuple[
NDArray[np.object_],
NDArray[np.object_],
NDArray[np.floating],
NDArray[np.floating],
]:
with (DATA_PATH / "objs_t_test.pkl").open("rb") as f:
true_scores_t_test, true_names_t_test = pickle.load(f)
with (DATA_PATH / "objs_wilcoxon.pkl").open("rb") as f:
true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f)

return (
true_names_t_test,
true_names_wilcoxon,
true_scores_t_test,
true_scores_wilcoxon,
)
class Expected(TypedDict):
names: NDArray[np.str_]
scores: NDArray[np.floating]


def get_true_scores(method: Literal["t-test", "wilcoxon"]) -> Expected:
path = DATA_PATH / f"objs-{method}.npz"
with (
path.open("rb") as f,
cast("NpzFile", np.load(f, allow_pickle=False)) as z,
):
expected = dict(z)
return Expected(names=expected["names"].astype("T"), scores=expected["scores"])


# TODO: Make dask compatible
@pytest.mark.parametrize("method", ["t-test", "wilcoxon"])
@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
def test_results(array_type):
def test_results(
subtests: pytest.Subtests, array_type, method: Literal["t-test", "wilcoxon"]
) -> None:
seed(1234)

adata = get_example_data(array_type)
assert adata.raw is None # Assumption for later checks
expected = get_true_scores(method)
# no clue why we did this: https://github.com/scverse/scanpy/commit/7f10fa3138374bbc664776c6aae1c0e05cf2c5cf
n = 7 if method == "wilcoxon" else None

(
true_names_t_test,
true_names_wilcoxon,
true_scores_t_test,
true_scores_wilcoxon,
) = get_true_scores()

rank_genes_groups(adata, "true_groups", n_genes=20, method="t-test")

adata.uns["rank_genes_groups"]["names"] = adata.uns["rank_genes_groups"][
"names"
].astype(true_names_t_test.dtype)
rank_genes_groups(adata, "true_groups", n_genes=20, method=method)
results = adata.uns["rank_genes_groups"]

for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_t_test[name], adata.uns["rank_genes_groups"]["scores"][name]
)
assert np.array_equal(true_names_t_test, adata.uns["rank_genes_groups"]["names"])
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False

rank_genes_groups(adata, "true_groups", n_genes=20, method="wilcoxon")

adata.uns["rank_genes_groups"]["names"] = adata.uns["rank_genes_groups"][
"names"
].astype(true_names_wilcoxon.dtype)

for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)
assert np.array_equal(
true_names_wilcoxon[:7], adata.uns["rank_genes_groups"]["names"][:7]
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for g in range(expected["names"].shape[0]):
with subtests.test(group=g):
assert np.allclose(expected["scores"][g, :n], results["scores"][str(g)][:n])
assert np.array_equal(
expected["names"][g, :n], results["names"][str(g)][:n]
)
assert results["params"]["use_raw"] is False


@pytest.mark.parametrize("method", ["t-test", "wilcoxon"])
@pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM)
def test_results_layers(array_type):
def test_results_layers(
subtests: pytest.Subtests, array_type, method: Literal["t-test", "wilcoxon"]
) -> None:
seed(1234)

adata = get_example_data(array_type)
adata.layers["to_test"] = adata.X.copy()
x = adata.X.tolil() if isinstance(adata.X, CSBase) else adata.X
mask = np.random.randint(0, 2, adata.shape, dtype=bool)
x[mask] = 0
adata.X = array_type(x)

_, _, true_scores_t_test, true_scores_wilcoxon = get_true_scores()

# Wilcoxon
rank_genes_groups(
adata,
"true_groups",
method="wilcoxon",
layer="to_test",
n_genes=20,
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

rank_genes_groups(adata, "true_groups", method="wilcoxon", n_genes=20)
for name in true_scores_t_test.dtype.names:
assert not np.allclose(
true_scores_wilcoxon[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

# t-test
rank_genes_groups(
adata,
"true_groups",
method="t-test",
layer="to_test",
use_raw=False,
n_genes=20,
)
for name in true_scores_t_test.dtype.names:
assert np.allclose(
true_scores_t_test[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
)

rank_genes_groups(adata, "true_groups", method="t-test", n_genes=20)
for name in true_scores_t_test.dtype.names:
assert not np.allclose(
true_scores_t_test[name][:7],
adata.uns["rank_genes_groups"]["scores"][name][:7],
scores = get_true_scores(method)["scores"]

with subtests.test("layer"):
rank_genes_groups(
adata,
"true_groups",
method=method,
layer="to_test",
use_raw=None if method == "wilcoxon" else False,
n_genes=20,
)
assert adata.uns["rank_genes_groups"]["params"]["use_raw"] is False
for g in range(scores.shape[0]):
np.testing.assert_allclose(
scores[g, :7],
adata.uns["rank_genes_groups"]["scores"][str(g)][:7],
rtol=1e-5, # default of np.allclose
)

with subtests.test("X"):
rank_genes_groups(adata, "true_groups", method=method, n_genes=20)
for g in range(scores.shape[0]):
assert not np.allclose(
scores[g, :7], adata.uns["rank_genes_groups"]["scores"][str(g)][:7]
)


def test_rank_genes_groups_use_raw():
Expand Down
Loading