Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/decoupler/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@
tval
Whether to return the t-value (``tval=True``) the coefficient of the fitted model (``tval=False``)."""

_n_bg = """\
n_bg
Number indicating the background size."""

_ha_corr = """\
ha_corr
Haldane-Anscombe correction of odds ratio."""

_params = f"""\
Parameters
----------
Expand Down Expand Up @@ -252,4 +260,6 @@
notest=_notest,
returns=_returns,
tval=_tval,
n_bg=_n_bg,
ha_corr=_ha_corr,
)
1 change: 1 addition & 0 deletions src/decoupler/mt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from decoupler.mt._consensus import consensus
from decoupler.mt._decouple import decouple
from decoupler.mt._methods import _methods, aucell, gsea, gsva, mdt, mlm, ora, udt, ulm, viper, waggr, zscore
from decoupler.mt._query_set import query_set


def show() -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/decoupler/mt/_ora.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ def _func_ora(
If ``None``, the top 5% of positive features are selected.
n_bm
Number of bottom-ranked features, based on their magnitude, to select as observed features.
n_bg
Number indicating the background size.
%(n_bg)s
%(ha_corr)s

%(returns)s

Expand Down
83 changes: 83 additions & 0 deletions src/decoupler/mt/_query_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pandas as pd
import scipy.stats as sts
from tqdm.auto import tqdm

from decoupler._docs import docs
from decoupler._log import _log
from decoupler.mt._ora import _oddsr, _test1t
from decoupler.pp.net import prune


@docs.dedent
def query_set(
features: list,
net: pd.DataFrame,
n_bg: int | float | None = 20_000,
ha_corr: int | float = 0.5,
tmin: int | float = 5,
verbose: bool = False,
):
"""
Test overlap between a given feature set against a database of sets.

Parameters
----------
features
Set of features
%(net)s
%(n_bg)s
%(ha_corr)s
%(tmin)s
%(verbose)s

Returns
-------
Dataframe containing the odds ratio and fisher exact test p-values for the overlap of the given
features across sets in a network.

Example
-------
.. code-block:: python

import decoupler as dc

ct = dc.op.collectri()
ft = set(ct[ct["source"] == "SMAD4"]["target"])
dc.pp.query_set(features=fset, net=ct)
"""
# Validate
assert hasattr(features, "__iter__") and not isinstance(features, str | bytes), (
"features must be an iterable collection of items such as a list"
)
features_set: set = set(features)
if n_bg is None:
n_bg = 0
m = "query_set - not using n_bg, a feature specific background will be used instead"
_log(m, level="info", verbose=verbose)
assert isinstance(n_bg, int | float) and n_bg >= 0, "n_bg must be numeric and positive"
# Prune
net = prune(features=None, net=net, tmin=tmin, verbose=verbose)
# Test each set against given set
sources = net["source"].unique()
df = []
for source in tqdm(sources, disable=not verbose):
targets = set(net[net["source"] == source]["target"])
set_a = features_set.intersection(targets)
set_b = targets.difference(features_set)
set_c = features_set.difference(targets)
a = len(set_a)
b = len(set_b)
c = len(set_c)
if n_bg == 0:
set_u = set_a.union(set_b).union(set_c)
set_d = set(net["target"]).difference(set_u)
d = len(set_d)
else:
d = int(n_bg - a - b - c)
od = _oddsr(a=a, b=b, c=c, d=d, ha_corr=ha_corr, log=True)
pv = _test1t(a=a, b=b, c=c, d=d)
df.append([source, od, pv])
df = pd.DataFrame(df, columns=["source", "stat", "pval"])
df["padj"] = sts.false_discovery_control(df["pval"], method="bh")
df = df.sort_values(["padj", "pval"]).reset_index(drop=True)
return df
9 changes: 5 additions & 4 deletions src/decoupler/pp/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _validate_net(

@docs.dedent
def prune(
features: np.ndarray,
features: np.ndarray | None,
net: pd.DataFrame,
tmin: int = 5,
verbose: bool = False,
Expand Down Expand Up @@ -110,11 +110,12 @@ def prune(
"""
# Validate
vnet = _validate_net(net, verbose=verbose)
features_set = set(features)
assert isinstance(tmin, int | float) and tmin >= 0, "tmin must be numeric and >= 0"
# Find shared targets between mat and net
msk = vnet["target"].isin(features_set)
vnet = vnet.loc[msk]
if features is not None:
features_set = set(features)
msk = vnet["target"].isin(features_set)
vnet = vnet.loc[msk]
# Find unique sources with tmin
sources = vnet["source"].value_counts()
sources = set(sources[sources >= tmin].index)
Expand Down
14 changes: 14 additions & 0 deletions tests/mt/test_query_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pandas as pd

import decoupler as dc


def test_query_set(
net,
):
ft = set(net[net["source"] == "T1"]["target"])
df = dc.mt.query_set(features=ft, net=net, tmin=0)
assert isinstance(df, pd.DataFrame)
cols = {"source", "stat", "pval", "padj"}
assert cols.issubset(df.columns)
df = dc.mt.query_set(features=ft, net=net, n_bg=None, tmin=0)
Loading