Skip to content

Commit 38b50dc

Browse files
authored
Merge pull request #211 from scverse/2.0.8
2.1.0 changes
2 parents 82a4b99 + 9188c90 commit 38b50dc

19 files changed

Lines changed: 323 additions & 61 deletions

CHANGELOG.md

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,60 @@ and this project adheres to [Semantic Versioning][].
88
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
99
[semantic versioning]: https://semver.org/spec/v2.0.0.html
1010

11-
## 2.0.8
11+
## 2.1.0
12+
13+
### Changes
14+
15+
- Updated logo
16+
- `decoupler._download._download` now returns bytes instead of a dataframe. To transform to `pandas.DataFrame` use `decoupler._download._bytes_to_pandas`
17+
- Enrichment methods and pseudobulking now work with backed AnnData objects, useful when working with big datasets and memory is limited
18+
19+
### Bugfixes
20+
- Fixed error in `pl.obsm` where default value of `cmap_obs` was not properly set.
21+
22+
## 2.0.7
23+
24+
### Changes
25+
- Added `pre-commit` functionality to the repository
26+
27+
## 2.0.6
28+
29+
### Changes
30+
- Modified links and paths to follow scverse's repository
31+
32+
## 2.0.5
33+
34+
### Bugfixes
35+
- Fixed error message when extra dependencies where not installed
36+
37+
## 2.0.4
38+
39+
### Bugfixes
40+
- Fixed `dcor` import bug as an external dependency
41+
42+
## 2.0.3
43+
44+
### Bugfixes
45+
- Fixed error in `pp.pseudobulk` when obs columns were not categorical
46+
47+
## 2.0.2
48+
49+
### Changes
50+
- Allowed ordering functions (`pp.bin_order`, `pl.order` and `pl.order_targets`) not to be bound between 0 and 1
51+
- Added ipywidgets as dependency
52+
- Silenced xgboost warnings
53+
54+
### Bugfixes
55+
- Handled corner case in `bm.metric.auc` when scores are all 0
56+
- Fixed error in `bm.metric.hmean` when metrics were str instead of list
57+
- Fixed error when `obs` column is a list in `pp.pseudobulk`
58+
59+
## 2.0.1
60+
61+
### Bugfixes
62+
- Fixed an error in `pp.pseudobulk` when handling empty samples or features
63+
64+
## 2.0.0
1265

1366
Major update to accomodate the scverse template {cite}`scverse`.
1467

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# decoupler - Ensemble of methods to infer enrichment scores
2-
<img src="https://github.com/saezlab/decoupleR/blob/master/inst/figures/logo.svg?raw=1" align="right" width="120" class="no-scaled-link" />
2+
<img src="https://raw.githubusercontent.com/scverse/decoupler/refs/heads/2.0.8/docs/_static/images/logo.svg" align="right" width="120" class="no-scaled-link" />
33

44

55
[![Tests][badge-tests]][tests]

docs/_static/images/logo.png

95 KB
Loading

docs/_static/images/logo.svg

Lines changed: 65 additions & 0 deletions
Loading

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
44

55
[project]
66
name = "decoupler"
7-
version = "2.0.8"
7+
version = "2.1.0"
88
description = "Python package to perform enrichment analysis from omics data."
99
readme = "README.md"
1010
license = { file = "LICENSE" }
@@ -66,6 +66,7 @@ optional-dependencies.test = [
6666
"gseapy",
6767
"igraph",
6868
"ipywidgets",
69+
"memory-profiler",
6970
"pytest",
7071
"scanpy",
7172
"statsmodels",

src/decoupler/_download.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
def _download(
1414
url: str,
1515
verbose: bool = False,
16-
**kwargs,
17-
) -> pd.DataFrame:
16+
) -> bytes:
1817
assert isinstance(url, str), "url must be str"
1918
# Download with progress bar
2019
m = f"Downloading {url}"
@@ -27,9 +26,13 @@ def _download(
2726
if chunk:
2827
chunks.append(chunk)
2928
pbar.update(len(chunk))
30-
# Read into pandas
29+
# Read into bytes
3130
data = io.BytesIO(b"".join(chunks))
32-
df = pd.read_csv(data, **kwargs)
3331
m = "Download finished"
3432
_log(m, level="info", verbose=verbose)
33+
return data
34+
35+
36+
def _bytes_to_pandas(data: bytes, **kwargs) -> pd.DataFrame:
37+
df = pd.read_csv(data, **kwargs)
3538
return df

src/decoupler/ds/_bulk.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from anndata import AnnData
22

33
from decoupler._docs import docs
4-
from decoupler._download import _download
4+
from decoupler._download import _bytes_to_pandas, _download
55
from decoupler._log import _log
66

77

@@ -38,7 +38,8 @@ def hsctgfb(
3838
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE151251&format=file&"
3939
"file=GSE151251%5FHSCs%5FCtrl%2Evs%2EHSCs%5FTGFb%2Ecounts%2Etsv%2Egz"
4040
)
41-
adata = _download(url, compression="gzip", sep="\t", verbose=verbose)
41+
adata = _download(url, verbose=verbose)
42+
adata = _bytes_to_pandas(adata, compression="gzip", sep="\t")
4243
# Transform to AnnData
4344
adata = adata.drop_duplicates("GeneName").set_index("GeneName").iloc[:, 5:].T
4445
adata.columns.name = None
@@ -92,9 +93,11 @@ def knocktf(
9293
assert isinstance(thr_fc, int | float) or thr_fc is None, "thr_fc must be numeric or None"
9394
# Download
9495
url = "https://zenodo.org/record/7035528/files/knockTF_expr.csv?download=1"
95-
adata = _download(url, sep=",", index_col=0, verbose=verbose)
96+
adata = _download(url, verbose=verbose)
97+
adata = _bytes_to_pandas(adata, sep=",", index_col=0)
9698
url = "https://zenodo.org/record/7035528/files/knockTF_meta.csv?download=1"
97-
obs = _download(url, sep=",", index_col=0, verbose=verbose)
99+
obs = _download(url, verbose=verbose)
100+
obs = _bytes_to_pandas(obs, sep=",", index_col=0)
98101
obs = obs.rename(columns={"TF": "source"}).assign(type_p=-1)
99102
# Make anndata
100103
adata = AnnData(X=adata, obs=obs)

src/decoupler/mt/_run.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,21 +55,33 @@ def _run(
5555
) -> tuple[pd.DataFrame, pd.DataFrame] | AnnData | None:
5656
_log(f"{name} - Running {name}", level="info", verbose=verbose)
5757
# Process data
58-
mat, obs, var = extract(data, layer=layer, raw=raw, empty=empty, verbose=verbose)
59-
sparse = sps.issparse(mat)
58+
mat, obs, var = extract(data, layer=layer, raw=raw, empty=empty, verbose=verbose, bsize=bsize)
59+
issparse = sps.issparse(mat)
60+
isbacked = isinstance(mat, tuple)
6061
# Process net
6162
net = prune(features=var, net=net, tmin=tmin, verbose=verbose)
6263
# Handle stat type
6364
if adj:
6465
sources, targets, adjm = adjmat(features=var, net=net, verbose=verbose)
65-
# Handle sparse
66-
if sparse:
66+
# Handle batches
67+
if issparse or isbacked:
6768
nbatch = int(np.ceil(obs.size / bsize))
6869
es, pv = [], []
6970
for i in tqdm(range(nbatch), disable=not verbose):
71+
if i == 0 and verbose:
72+
batch_verbose = True
73+
else:
74+
batch_verbose = False
7075
srt, end = i * bsize, i * bsize + bsize
71-
bmat = mat[srt:end].toarray()
72-
bes, bpv = func(bmat, adjm, verbose=verbose, **kwargs)
76+
if sps.issparse(mat):
77+
bmat = mat[srt:end].toarray()
78+
else:
79+
bmat, msk_col = mat
80+
bmat = bmat[srt:end, :]
81+
if sps.issparse(bmat):
82+
bmat = bmat.toarray()
83+
bmat = bmat[:, msk_col]
84+
bes, bpv = func(bmat, adjm, verbose=batch_verbose, **kwargs)
7385
es.append(bes)
7486
pv.append(bpv)
7587
es = np.vstack(es)
@@ -79,7 +91,23 @@ def _run(
7991
es = pd.DataFrame(es, index=obs, columns=sources)
8092
else:
8193
sources, cnct, starts, offsets = idxmat(features=var, net=net, verbose=verbose)
82-
es, pv = func(mat, cnct, starts, offsets, verbose=verbose, **kwargs)
94+
if isbacked:
95+
nbatch = int(np.ceil(obs.size / bsize))
96+
es, pv = [], []
97+
for i in tqdm(range(nbatch), disable=not verbose):
98+
if i == 0 and verbose:
99+
batch_verbose = True
100+
else:
101+
batch_verbose = False
102+
srt, end = i * bsize, i * bsize + bsize
103+
bmat, msk_col = mat
104+
bmat = bmat[srt:end, msk_col]
105+
bes, bpv = func(bmat, cnct, starts, offsets, verbose=batch_verbose, **kwargs)
106+
es.append(bes)
107+
pv.append(bpv)
108+
es = np.vstack(es)
109+
else:
110+
es, pv = func(mat, cnct, starts, offsets, verbose=verbose, **kwargs)
83111
es = pd.DataFrame(es, index=obs, columns=sources)
84112
# Handle pvals and FDR correction
85113
if test:

src/decoupler/op/_collectri.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pandas as pd
22

33
from decoupler._docs import docs
4-
from decoupler._download import _download
4+
from decoupler._download import _bytes_to_pandas, _download
55
from decoupler.op._dtype import _infer_dtypes
66
from decoupler.op._translate import translate
77

@@ -45,6 +45,7 @@ def collectri(
4545
"""
4646
url = "https://zenodo.org/records/8192729/files/CollecTRI_regulons.csv?download=1"
4747
ct = _download(url, verbose=verbose)
48+
ct = _bytes_to_pandas(ct)
4849
# Update resources
4950
resources = []
5051
for str_res in ct["resources"]:

src/decoupler/op/_dorothea.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pandas as pd
22

33
from decoupler._docs import docs
4-
from decoupler._download import URL_INT, _download
4+
from decoupler._download import URL_INT, _bytes_to_pandas, _download
55
from decoupler._log import _log
66
from decoupler.op._dtype import _infer_dtypes
77
from decoupler.op._translate import translate
@@ -71,7 +71,8 @@ def dorothea(
7171
url = URL_INT + url_ext
7272
m = f"dorothea - Accessing DoRothEA (levels {str_levels}) with {license} license and weights={weights}"
7373
_log(m, level="info", verbose=verbose)
74-
do = _download(url, sep="\t", verbose=verbose)
74+
do = _download(url, verbose=verbose)
75+
do = _bytes_to_pandas(do, sep="\t")
7576
# Filter extra columns
7677
do = do[
7778
[

0 commit comments

Comments
 (0)