Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning][].
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
[semantic versioning]: https://semver.org/spec/v2.0.0.html

## 2.1.4

### Changes
- Added `alternative` argument to `decoupler.mt.query_set`. By default `'greater'`, before it was `'two-sided'`
- Unpinned `scipy` version limit

### Bugfixes
- Fixed missing progressbar for `decoupler._download._download`
- Added missing `decoupler.mt.query_set` documentation

## 2.1.3

### Changes
Expand Down
9 changes: 9 additions & 0 deletions docs/api/mt.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@
mt.decouple
mt.consensus
```

## Miscellaneous

```{eval-rst}
.. autosummary::
:toctree: generated

mt.query_set
```
17 changes: 11 additions & 6 deletions src/decoupler/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ def _download_chunks(
headers = {"User-Agent": f"decoupler/{__version__} (https://github.com/scverse/decoupler)"}
with requests.get(url, stream=True, headers=headers) as r:
r.raise_for_status()
with tqdm(unit="B", unit_scale=True, desc="Progress", disable=not verbose) as pbar:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
chunks.append(chunk)
pbar.update(len(chunk))
total = r.headers.get("Content-Length")
total = int(total) if total and total.isdigit() else None
with tqdm(
total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Progress", disable=not verbose
) as pbar:
for chunk in r.iter_content(chunk_size=1024 * 64):
if not chunk:
continue
chunks.append(chunk)
pbar.update(len(chunk))
# Read into bytes
data = io.BytesIO(b"".join(chunks))
return data
Expand All @@ -44,7 +49,7 @@ def _download(
data = None
for attempt in range(1, retries + 1):
try:
data = _download_chunks(url, verbose=False)
data = _download_chunks(url, verbose=verbose)
break
except requests.exceptions.HTTPError as e:
status_code = e.response.status_code if e.response is not None else None
Expand Down
25 changes: 13 additions & 12 deletions src/decoupler/ds/_scell.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import pandas as pd
import requests
import scipy.io as sio
from tqdm.auto import tqdm

from decoupler._docs import docs
from decoupler._download import _download
from decoupler._log import _log
from decoupler.ds._utils import ensmbl_to_symbol

Expand All @@ -19,17 +19,18 @@ def _download_anndata(
verbose: bool = False,
) -> ad.AnnData:
warnings.filterwarnings("ignore", category=FutureWarning)
with requests.get(url, stream=True) as response:
response.raise_for_status()
total_size = int(response.headers.get("content-length", 0))
chunk_size = 8192
buffer = io.BytesIO()
with tqdm(total=total_size, unit="B", unit_scale=True, desc="Downloading .h5ad", disable=not verbose) as pbar:
for chunk in response.iter_content(chunk_size=chunk_size):
buffer.write(chunk)
pbar.update(len(chunk))
buffer.seek(0)
adata = ad.read_h5ad(buffer)
# with requests.get(url, stream=True) as response:
# response.raise_for_status()
# total_size = int(response.headers.get("content-length", 0))
# chunk_size = 8192
# buffer = io.BytesIO()
# with tqdm(total=total_size, unit="B", unit_scale=True, desc="Downloading .h5ad", disable=not verbose) as pbar:
# for chunk in response.iter_content(chunk_size=chunk_size):
# buffer.write(chunk)
# pbar.update(len(chunk))
# buffer.seek(0)
adata = _download(url=url, verbose=verbose)
adata = ad.read_h5ad(adata)
return adata


Expand Down
9 changes: 6 additions & 3 deletions src/decoupler/mt/_query_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

from decoupler._docs import docs
from decoupler._log import _log
from decoupler.mt._ora import _oddsr, _test1t
from decoupler.mt._ora import _oddsr
from decoupler.pp.net import prune


@docs.dedent
def query_set(
features: list,
net: pd.DataFrame,
alternative: str = "greater",
n_bg: int | float | None = 20_000,
ha_corr: int | float = 0.5,
tmin: int | float = 5,
Expand All @@ -25,6 +26,8 @@ def query_set(
features
Set of features
%(net)s
alternative
Defines the alternative hypothesis for fisher exact test. Check ``scipy.stats.fisher_exact``.
%(n_bg)s
%(ha_corr)s
%(tmin)s
Expand All @@ -43,7 +46,7 @@ def query_set(

ct = dc.op.collectri()
ft = set(ct[ct["source"] == "SMAD4"]["target"])
dc.pp.query_set(features=fset, net=ct)
dc.mt.query_set(features=ft, net=ct)
"""
# Validate
assert hasattr(features, "__iter__") and not isinstance(features, str | bytes), (
Expand Down Expand Up @@ -75,7 +78,7 @@ def query_set(
else:
d = int(n_bg - a - b - c)
od = _oddsr(a=a, b=b, c=c, d=d, ha_corr=ha_corr, log=True)
pv = _test1t(a=a, b=b, c=c, d=d)
_, pv = sts.fisher_exact([[a, b], [c, d]], alternative=alternative)
df.append([source, od, pv])
df = pd.DataFrame(df, columns=["source", "stat", "pval"])
df["padj"] = sts.false_discovery_control(df["pval"], method="bh")
Expand Down
8 changes: 4 additions & 4 deletions tests/pp/test_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_pseudobulk(
empty,
rng,
):
adata, _ = dc.ds.toy(nobs=10000, nvar=900, bval=2, seed=42, verbose=False)
adata, _ = dc.ds.toy(nobs=1000, nvar=250, bval=2, seed=42, verbose=False)
adata.layers["counts"] = adata.X.round()
adata.obs["sample"] = adata.obs["sample"]
adata.obs["dose"] = rng.choice(["low", "medium", "high"], size=adata.n_obs, replace=True)
Expand Down Expand Up @@ -105,7 +105,7 @@ def _run_psbulk():
)
return pdata

l_mem_usage, pdata = memory_usage(_run_psbulk, retval=True, interval=0.001)
l_mem_usage, pdata = memory_usage(_run_psbulk, retval=True, interval=0.0001)
l_mem_usage = max(l_mem_usage) - min(l_mem_usage)
assert isinstance(pdata, ad.AnnData)
assert pdata.shape[0] < adata.shape[0]
Expand Down Expand Up @@ -140,9 +140,9 @@ def _run_psbulk_backed_data():
)
return pbdata

b_mem_usage, pbdata = memory_usage(_run_psbulk_backed_data, retval=True, interval=0.001)
b_mem_usage, pbdata = memory_usage(_run_psbulk_backed_data, retval=True, interval=0.0001)
b_mem_usage = max(b_mem_usage) - min(b_mem_usage)
assert b_mem_usage < l_mem_usage
# assert b_mem_usage < l_mem_usage # Too unstable
msk = pbdata.X.sum(1) != 0
pbdata = pbdata[msk, :].copy()
assert pbdata.shape == pdata.shape
Expand Down
Loading