Skip to content

Commit c31ddc0

Browse files
authored
Merge branch 'main' into source_targets_enh
2 parents 37d3c41 + 68c1919 commit c31ddc0

6 files changed

Lines changed: 115 additions & 6 deletions

File tree

src/decoupler/_docs.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,14 @@
194194
tval
195195
Whether to return the t-value (``tval=True``) the coefficient of the fitted model (``tval=False``)."""
196196

197+
_n_bg = """\
198+
n_bg
199+
Number indicating the background size."""
200+
201+
_ha_corr = """\
202+
ha_corr
203+
Haldane-Anscombe correction of odds ratio."""
204+
197205
_params = f"""\
198206
Parameters
199207
----------
@@ -252,4 +260,6 @@
252260
notest=_notest,
253261
returns=_returns,
254262
tval=_tval,
263+
n_bg=_n_bg,
264+
ha_corr=_ha_corr,
255265
)

src/decoupler/mt/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from decoupler.mt._consensus import consensus
33
from decoupler.mt._decouple import decouple
44
from decoupler.mt._methods import _methods, aucell, gsea, gsva, mdt, mlm, ora, udt, ulm, viper, waggr, zscore
5+
from decoupler.mt._query_set import query_set
56

67

78
def show() -> None:

src/decoupler/mt/_ora.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,8 @@ def _func_ora(
234234
If ``None``, the top 5% of positive features are selected.
235235
n_bm
236236
Number of bottom-ranked features, based on their magnitude, to select as observed features.
237-
n_bg
238-
Number indicating the background size.
237+
%(n_bg)s
238+
%(ha_corr)s
239239
240240
%(returns)s
241241

src/decoupler/mt/_query_set.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import pandas as pd
2+
import scipy.stats as sts
3+
from tqdm.auto import tqdm
4+
5+
from decoupler._docs import docs
6+
from decoupler._log import _log
7+
from decoupler.mt._ora import _oddsr, _test1t
8+
from decoupler.pp.net import prune
9+
10+
11+
@docs.dedent
12+
def query_set(
13+
features: list,
14+
net: pd.DataFrame,
15+
n_bg: int | float | None = 20_000,
16+
ha_corr: int | float = 0.5,
17+
tmin: int | float = 5,
18+
verbose: bool = False,
19+
):
20+
"""
21+
Test overlap between a given feature set against a database of sets.
22+
23+
Parameters
24+
----------
25+
features
26+
Set of features
27+
%(net)s
28+
%(n_bg)s
29+
%(ha_corr)s
30+
%(tmin)s
31+
%(verbose)s
32+
33+
Returns
34+
-------
35+
Dataframe containing the odds ratio and fisher exact test p-values for the overlap of the given
36+
features across sets in a network.
37+
38+
Example
39+
-------
40+
.. code-block:: python
41+
42+
import decoupler as dc
43+
44+
ct = dc.op.collectri()
45+
ft = set(ct[ct["source"] == "SMAD4"]["target"])
46+
dc.pp.query_set(features=fset, net=ct)
47+
"""
48+
# Validate
49+
assert hasattr(features, "__iter__") and not isinstance(features, str | bytes), (
50+
"features must be an iterable collection of items such as a list"
51+
)
52+
features_set: set = set(features)
53+
if n_bg is None:
54+
n_bg = 0
55+
m = "query_set - not using n_bg, a feature specific background will be used instead"
56+
_log(m, level="info", verbose=verbose)
57+
assert isinstance(n_bg, int | float) and n_bg >= 0, "n_bg must be numeric and positive"
58+
# Prune
59+
net = prune(features=None, net=net, tmin=tmin, verbose=verbose)
60+
# Test each set against given set
61+
sources = net["source"].unique()
62+
df = []
63+
for source in tqdm(sources, disable=not verbose):
64+
targets = set(net[net["source"] == source]["target"])
65+
set_a = features_set.intersection(targets)
66+
set_b = targets.difference(features_set)
67+
set_c = features_set.difference(targets)
68+
a = len(set_a)
69+
b = len(set_b)
70+
c = len(set_c)
71+
if n_bg == 0:
72+
set_u = set_a.union(set_b).union(set_c)
73+
set_d = set(net["target"]).difference(set_u)
74+
d = len(set_d)
75+
else:
76+
d = int(n_bg - a - b - c)
77+
od = _oddsr(a=a, b=b, c=c, d=d, ha_corr=ha_corr, log=True)
78+
pv = _test1t(a=a, b=b, c=c, d=d)
79+
df.append([source, od, pv])
80+
df = pd.DataFrame(df, columns=["source", "stat", "pval"])
81+
df["padj"] = sts.false_discovery_control(df["pval"], method="bh")
82+
df = df.sort_values(["padj", "pval"]).reset_index(drop=True)
83+
return df

src/decoupler/pp/net.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def _validate_net(
7979

8080
@docs.dedent
8181
def prune(
82-
features: np.ndarray,
82+
features: np.ndarray | None,
8383
net: pd.DataFrame,
8484
tmin: int = 5,
8585
verbose: bool = False,
@@ -110,11 +110,12 @@ def prune(
110110
"""
111111
# Validate
112112
vnet = _validate_net(net, verbose=verbose)
113-
features_set = set(features)
114113
assert isinstance(tmin, int | float) and tmin >= 0, "tmin must be numeric and >= 0"
115114
# Find shared targets between mat and net
116-
msk = vnet["target"].isin(features_set)
117-
vnet = vnet.loc[msk]
115+
if features is not None:
116+
features_set = set(features)
117+
msk = vnet["target"].isin(features_set)
118+
vnet = vnet.loc[msk]
118119
# Find unique sources with tmin
119120
sources = vnet["source"].value_counts()
120121
sources = set(sources[sources >= tmin].index)

tests/mt/test_query_set.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import pandas as pd
2+
3+
import decoupler as dc
4+
5+
6+
def test_query_set(
7+
net,
8+
):
9+
ft = set(net[net["source"] == "T1"]["target"])
10+
df = dc.mt.query_set(features=ft, net=net, tmin=0)
11+
assert isinstance(df, pd.DataFrame)
12+
cols = {"source", "stat", "pval", "padj"}
13+
assert cols.issubset(df.columns)
14+
df = dc.mt.query_set(features=ft, net=net, n_bg=None, tmin=0)

0 commit comments

Comments
 (0)