Skip to content

Commit c1a458f

Browse files
authored
Merge pull request #145 from smuellerd/main
implement z-score method
2 parents 7a47da9 + 5955256 commit c1a458f

File tree

2 files changed

+137
-0
lines changed

2 files changed

+137
-0
lines changed

decoupler/method_zscore.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
Method zscore.
3+
Code to run the z-score (RoKAI, KSEA) method.
4+
"""
5+
6+
import numpy as np
7+
import pandas as pd
8+
9+
from scipy.stats import norm
10+
11+
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data
12+
13+
14+
def zscore(m, net, flavor='RoKAI', verbose=False):
15+
stds = np.std(m, axis=1, ddof=1)
16+
if flavor != 'RoKAI':
17+
mean_all = np.mean(m, axis=1)
18+
else:
19+
mean_all = np.zeros(stds.shape)
20+
n = np.sqrt(np.count_nonzero(net, axis=0))
21+
mean = m.dot(net) / np.sum(np.abs(net), axis=0)
22+
es = ((mean - mean_all.reshape(-1, 1)) * n) / stds.reshape(-1, 1)
23+
pv = norm.cdf(-np.abs(es))
24+
return es, pv
25+
26+
27+
def run_zscore(mat, net, source='source', target='target', weight='weight', batch_size=10000, flavor='RoKAI',
28+
min_n=5, verbose=False, use_raw=True):
29+
"""
30+
z-score.
31+
32+
Calculates regulatory activities using a z-score as descibed in KSEA or RoKAI. The z-score calculates the mean of the molecular features of the
33+
known targets for each regulator and adjusts it for the number of identified targets for the regulator, the standard deviation of all molecular
34+
features (RoKAI), as well as the mean of all moleculare features (KSEA).
35+
36+
Parameters
37+
----------
38+
mat : list, DataFrame or AnnData
39+
List of [features, matrix], dataframe (samples x features) or an AnnData instance.
40+
net : DataFrame
41+
Network in long format.
42+
source : str
43+
Column name in net with source nodes.
44+
target : str
45+
Column name in net with target nodes.
46+
weight : str
47+
Column name in net with weights.
48+
batch_size : int
49+
Size of the samples to use for each batch. Increasing this will consume more memmory but it will run faster.
50+
min_n : int
51+
Minimum of targets per source. If less, sources are removed.
52+
verbose : bool
53+
Whether to show progress.
54+
use_raw : bool
55+
Use raw attribute of mat if present.
56+
57+
Returns
58+
-------
59+
estimate : DataFrame
60+
Z-scores. Stored in `.obsm['zscore_estimate']` if `mat` is AnnData.
61+
pvals : DataFrame
62+
Obtained p-values. Stored in `.obsm['zscore_pvals']` if `mat` is AnnData.
63+
"""
64+
65+
# Extract sparse matrix and array of genes
66+
m, r, c = extract(mat, use_raw=use_raw, verbose=verbose)
67+
68+
# Transform net
69+
net = rename_net(net, source=source, target=target, weight=weight)
70+
net = filt_min_n(c, net, min_n=min_n)
71+
sources, targets, net = get_net_mat(net)
72+
73+
# Match arrays
74+
net = match(c, targets, net)
75+
76+
if verbose:
77+
print('Running zscore on mat with {0} samples and {1} targets for {2} sources.'.format(m.shape[0], len(c), net.shape[1]))
78+
79+
# Run ULM
80+
estimate, pvals = zscore(m, net, flavor=flavor)
81+
82+
# Transform to df
83+
estimate = pd.DataFrame(estimate, index=r, columns=sources)
84+
estimate.name = 'zscore_estimate'
85+
pvals = pd.DataFrame(pvals, index=r, columns=sources)
86+
pvals.name = 'zscore_pvals'
87+
88+
return return_data(mat=mat, results=(estimate, pvals))

decoupler/tests/test_zscore.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import numpy as np
2+
import pandas as pd
3+
from anndata import AnnData
4+
from ..method_zscore import zscore, run_zscore
5+
6+
def test_zscore():
7+
m = np.array([[-7., -1., 1., 1.], [-4., -2., 1., 2.], [1., 2., 5., 1.], [1., 1., 6., 2.], [-8., -7., 1., 1.]], dtype=np.float32)
8+
net = np.array([[1., 0.], [1, 0.], [0., -1.], [0., -1.]], dtype=np.float32)
9+
act, pvl = zscore(m, net)
10+
assert act[0, 0] < 0
11+
assert act[1, 0] < 0
12+
assert act[2, 0] > 0
13+
assert act[3, 0] > 0
14+
assert act[4, 0] < 0
15+
assert np.all((0. <= pvl) * (pvl <= 1.))
16+
17+
act2, pvl2 = zscore(m, net, flavor='KSEA')
18+
assert act2[0, 0] < 0
19+
assert act2[1, 0] < 0
20+
assert act2[2, 0] < 0
21+
assert act2[3, 0] < 0
22+
assert act2[4, 0] < 0
23+
assert np.all((0. <= pvl2) * (pvl2 <= 1.))
24+
25+
def test_run_zscore():
26+
m = np.array([[-7., -1., 1., 1.], [-4., -2., 1., 2.], [1., 2., 5., 1.], [1., 1., -6., -8.], [-8., -7., 1., 1.]])
27+
r = np.array(['S1', 'S2', 'S3', 'S4', 'S5'])
28+
c = np.array(['G1', 'G2', 'G3', 'G4'])
29+
df = pd.DataFrame(m, index=r, columns=c)
30+
net = pd.DataFrame([['T1', 'G1', 1], ['T1', 'G2', 1], ['T2', 'G3', -1], ['T2', 'G4', -1]],
31+
columns=['source', 'target', 'weight'])
32+
res = run_zscore(df, net, verbose=True, use_raw=False, min_n=0)
33+
assert res[0].loc['S1', 'T2'] < 0
34+
assert res[0].loc['S2', 'T2'] < 0
35+
assert res[0].loc['S3', 'T2'] < 0
36+
assert res[0].loc['S4', 'T2'] > 0
37+
assert res[0].loc['S5', 'T2'] < 0
38+
assert res[1].map(lambda x: 0 <= x <= 1).all().all()
39+
40+
res2 = run_zscore(df, net, verbose=True, use_raw=False, min_n=0, flavor='KSEA')
41+
assert res2[0].loc['S1', 'T2'] > 0
42+
assert res2[0].loc['S2', 'T2'] < 0
43+
assert res2[0].loc['S3', 'T2'] < 0
44+
assert res2[0].loc['S4', 'T2'] > 0
45+
assert res2[0].loc['S5', 'T2'] > 0
46+
assert res2[1].map(lambda x: 0 <= x <= 1).all().all()
47+
48+
adata = AnnData(df.astype(np.float32))
49+
run_zscore(adata, net, verbose=True, use_raw=False, min_n=0)

0 commit comments

Comments
 (0)