Skip to content

Commit 174b851

Browse files
sviatkhpre-commit-ci[bot]Intron7
authored
feat(decoupler_gpu): add zscore implementation with CuPy (scverse#470)
* zscore implementation with CuPy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add dtype and fix pre-commit * add release note 0.13.4 * fix release note --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Severin Dicks <37635888+Intron7@users.noreply.github.com> Co-authored-by: Intron7 <severin.dicks@icloud.com>
1 parent 8af527c commit 174b851

5 files changed

Lines changed: 128 additions & 0 deletions

File tree

docs/api/decoupler_gpu.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@
1212
dcg.mlm
1313
dcg.ulm
1414
dcg.aucell
15+
dcg.zscore
1516
```

docs/release-notes/0.13.4.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
```{rubric} Features
44
```
5+
* Added `rsc.dcg.zscore` function for decoupler-GPU {pr}`470` {smaller}`S Kharuk`
56
* In `pp.neighbors` set self-distances to zero where this is definitionally required
67
by the metric, but non-zero distances are potentially returned by `cuvs`/`cuml` due
78
to FP32 rounding. {pr}`477` {smaller}`J Pintar`

src/rapids_singlecell/decoupler_gpu/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from ._method_aucell import aucell
44
from ._method_mlm import mlm
55
from ._method_ulm import ulm
6+
from ._method_zscore import zscore
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
from __future__ import annotations
2+
3+
import cupy as cp
4+
import numpy as np
5+
from cupyx.scipy.special import erfc
6+
7+
from rapids_singlecell.decoupler_gpu._helper._docs import docs
8+
from rapids_singlecell.decoupler_gpu._helper._log import _log
9+
from rapids_singlecell.decoupler_gpu._helper._Method import Method, MethodMeta
10+
11+
12+
@docs.dedent
13+
def _func_zscore(
14+
mat: cp.ndarray,
15+
adj: cp.ndarray,
16+
*,
17+
flavor: str = "RoKAI",
18+
verbose: bool = False,
19+
) -> tuple[cp.ndarray, cp.ndarray]:
20+
r"""
21+
Z-score (ZSCORE) :cite:`zscore`.
22+
23+
This approach computes the mean value of the molecular features for known targets,
24+
optionally subtracts the overall mean of all measured features,
25+
and normalizes the result by the standard deviation of all features and the square
26+
root of the number of targets.
27+
28+
This formulation was originally introduced in KSEA, which explicitly includes the
29+
subtraction of the global mean to compute the enrichment score :math:`ES`.
30+
31+
.. math::
32+
33+
ES = \frac{(\mu_s-\mu_p) \times \sqrt m }{\sigma}
34+
35+
Where:
36+
37+
- :math:`\mu_s` is the mean of targets
38+
- :math:`\mu_p` is the mean of all features
39+
- :math:`m` is the number of targets
40+
- :math:`\sigma` is the standard deviation of all features
41+
42+
However, in the RoKAI implementation, this global mean subtraction was omitted.
43+
44+
.. math::
45+
46+
ES = \frac{\mu_s \times \sqrt m }{\sigma}
47+
48+
A two-sided :math:`p_{value}` is then calculated from the consensus score using
49+
the survival function :math:`sf` of the standard normal distribution.
50+
51+
.. math::
52+
53+
p = 2 \times \mathrm{sf}\bigl(\lvert \mathrm{ES} \rvert \bigr)
54+
55+
%(yestest)s
56+
57+
%(params)s
58+
59+
flavor
60+
Which flavor to use when calculating the z-score, either KSEA or RoKAI.
61+
62+
%(returns)s
63+
64+
Example
65+
-------
66+
.. code-block:: python
67+
68+
import decoupler as dc
69+
70+
adata, net = dc.ds.toy()
71+
rsc.dcg.zscore(adata, net, tmin=3)
72+
"""
73+
74+
assert isinstance(flavor, str) and flavor in ["KSEA", "RoKAI"], (
75+
"flavor must be str and KSEA or RoKAI"
76+
)
77+
nobs, nvar = mat.shape
78+
nvar, nsrc = adj.shape
79+
m = f"zscore - calculating {nsrc} scores with flavor={flavor}"
80+
_log(m, level="info", verbose=verbose)
81+
stds = cp.std(mat, axis=1, ddof=1, keepdims=True)
82+
if flavor == "RoKAI":
83+
mean_all = cp.mean(mat, axis=1, keepdims=True)
84+
elif flavor == "KSEA":
85+
mean_all = cp.zeros(stds.shape, dtype=mat.dtype)
86+
n = cp.sqrt(cp.count_nonzero(adj, axis=0))
87+
mean = mat.dot(adj) / cp.sum(cp.abs(adj), axis=0)
88+
es = ((mean - mean_all) * n) / stds
89+
pv = erfc(cp.abs(es) / cp.sqrt(2.0))
90+
return es.get(), pv.get()
91+
92+
93+
_zscore = MethodMeta(
94+
name="zscore",
95+
desc="Z-score (ZSCORE)",
96+
func=_func_zscore,
97+
stype="numerical",
98+
adj=True,
99+
weight=True,
100+
test=True,
101+
limits=(-np.inf, +np.inf),
102+
reference="https://doi.org/10.1038/s41467-021-21211-6",
103+
)
104+
zscore = Method(_method=_zscore)

tests/decoupler/test_zscore.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from __future__ import annotations
2+
3+
import cupy as cp
4+
import numpy as np
5+
import pytest
6+
7+
import rapids_singlecell.decoupler_gpu as dc
8+
9+
10+
@pytest.mark.parametrize("flavor", ["KSEA", "RoKAI"])
11+
def test_func_zscore(
12+
mat,
13+
adjmat,
14+
flavor,
15+
):
16+
X, obs, var = mat
17+
X_cp = cp.array(X)
18+
adjmat_cp = cp.array(adjmat)
19+
es, pv = dc._method_zscore._func_zscore(mat=X_cp, adj=adjmat_cp, flavor=flavor)
20+
assert np.isfinite(es).all()
21+
assert ((0 <= pv) & (pv <= 1)).all()

0 commit comments

Comments
 (0)