Skip to content

Commit 1e5ac1c

Browse files
committed
feat: Include popV version in model metadata and refine metadata field handling and tag formatting. Pin broken dependencies.
1 parent d08455a commit 1e5ac1c

File tree

7 files changed

+32
-18
lines changed

7 files changed

+32
-18
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ fail_fast: false
22
default_language_version:
33
python: python3
44
default_stages:
5-
- pre-commit
6-
- pre-push
5+
- commit
76
minimum_pre_commit_version: 2.16.0
87
repos:
98
- repo: https://github.com/asottile/blacken-docs
@@ -18,7 +17,7 @@ repos:
1817
types: [yaml]
1918

2019
- repo: https://github.com/executablebooks/mdformat
21-
rev: 0.7.22
20+
rev: 1.0.0
2221
hooks:
2322
- id: mdformat
2423
additional_dependencies:
@@ -31,7 +30,7 @@ repos:
3130
)$
3231
3332
- repo: https://github.com/igorshubovych/markdownlint-cli
34-
rev: v0.45.0
33+
rev: v0.47.0
3534
hooks:
3635
- id: markdownlint-fix
3736
exclude: |
@@ -41,7 +40,7 @@ repos:
4140
)$
4241
4342
- repo: https://github.com/astral-sh/ruff-pre-commit
44-
rev: v0.13.1
43+
rev: v0.15.4
4544
hooks:
4645
- id: ruff
4746
args: [--fix, --exit-non-zero-on-fix]

popv/algorithms/_bbknn.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,20 @@ def compute_integration(self, adata):
9191
logging.info("Integrating data with bbknn")
9292
if len(adata.obs[self.batch_key].unique()) > 100:
9393
self.method_kwargs["neighbors_within_batch"] = 1
94-
if settings.cuml:
94+
if len(adata.obs[self.batch_key].unique()) > 200 and settings.cuml:
95+
logging.warning(
96+
f"Number of batches is {len(adata.obs[self.batch_key].unique())}, skipping RAPIDS BBKNN and running on CPU."
97+
)
98+
cuml = False
99+
else:
100+
cuml = settings.cuml
101+
if cuml:
95102
import rapids_singlecell as rsc
96103

97104
self.method_kwargs.pop("approx", None) # approx not supported in rsc
98105
self.method_kwargs.pop("use_annoy", None) # use_annoy not supported in rsc
99106
rsc.pp.bbknn(
100-
adata, batch_key=self.batch_key, use_rep="X_pca", algorithm="ivfflat", **self.method_kwargs, trim=0
107+
adata, batch_key=self.batch_key, use_rep="X_pca", algorithm="ivfflat", **self.method_kwargs
101108
)
102109
else:
103110
sc.external.pp.bbknn(adata, batch_key=self.batch_key, use_rep="X_pca", **self.method_kwargs)

popv/algorithms/_onclass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ def predict(self, adata):
188188
else:
189189
adata.obs[col] = adata.uns["unknown_celltype_label"]
190190
adata.obs[col] = adata.obs[col].astype(str) # Set dtype to string
191+
adata.obs[self.result_key] = adata.obs[self.result_key].astype(str)
192+
adata.obs[self.seen_result_key] = adata.obs[self.seen_result_key].astype(str)
191193
adata.obs.loc[adata.obs["_predict_cells"] == "relabel", result_df.columns] = result_df
192194
if self.return_probabilities:
193195
if f"{self.result_key}_probabilities" not in adata.obsm:

popv/algorithms/_scanvi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def compute_umap(self, adata):
208208
Anndata object. Results are stored in adata.obsm[self.umap_key].
209209
"""
210210
if self.compute_umap_embedding:
211-
logging.info(f'Saving UMAP of BBKNN results to adata.obsm["{self.umap_key}"]')
211+
logging.info(f'Saving UMAP of scANVI results to adata.obsm["{self.umap_key}"]')
212212
if settings.cuml:
213213
import rapids_singlecell as rsc
214214

popv/hub/_metadata.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ def from_anndata(
7878
Additional keyword arguments to pass to the HubMetadata initializer.
7979
"""
8080
setup_dict = adata.uns["_setup_dict"]
81-
prediction_keys = adata.uns["prediction_keys"]
82-
methods = adata.uns["methods"]
81+
prediction_keys = list(adata.uns["prediction_keys"])
82+
methods = list(adata.uns["methods"])
8383
method_kwargs = adata.uns["method_kwargs"]
8484

8585
return cls(
@@ -122,6 +122,8 @@ class HubModelCardHelper:
122122
The version of `scvi-tools` that the model was trained with.
123123
anndata_version
124124
The version of anndata used during model training.
125+
popv_version
126+
The version of popV that the model was trained with.
125127
scikit_learn_version
126128
The version of scikit-learn used during model training.
127129
organism
@@ -153,6 +155,7 @@ class HubModelCardHelper:
153155

154156
license_info: str
155157
anndata_version: str
158+
popv_version: str
156159
scikit_learn_version: str
157160
organism: str
158161
tissues: list[str] = field(default_factory=list)
@@ -172,6 +175,7 @@ def from_dir(
172175
license_info: str,
173176
anndata_version: str,
174177
scikit_learn_version: str,
178+
popv_version: str,
175179
organism: str,
176180
metrics_report: str | None = None,
177181
**kwargs,
@@ -210,6 +214,7 @@ def from_dir(
210214
return cls(
211215
license_info,
212216
anndata_version,
217+
popv_version,
213218
scikit_learn_version,
214219
organism,
215220
metrics_report=metrics_report,
@@ -222,11 +227,11 @@ def _to_model_card(self) -> ModelCard:
222227
"biology",
223228
"genomics",
224229
"single-cell",
225-
f"anndata_version:{self.anndata_version}",
226-
f"scikit_learn_version:{self.scikit_learn_version}",
230+
f"AnnData:{self.anndata_version}",
231+
f"scikit_learn:{self.scikit_learn_version}",
227232
f"organism:{self.organism}",
228-
f"python_version:{'.'.join([str(i) for i in sys.version_info[:3]])}",
229-
"popV",
233+
f"Python:{'.'.join([str(i) for i in sys.version_info[:3]])}",
234+
f"popV:{self.popv_version}",
230235
]
231236
for t in self.tissues:
232237
tags.append(f"tissue: {t}")

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ exclude = ["resources/*", "tests/*", "dataset/*"]
1010

1111
[project]
1212
name = "popV"
13-
version = "0.6.0"
13+
version = "0.6.1"
1414
description = "Consensus prediction of cell type labels with popV"
1515
readme = "README.md"
1616
requires-python = ">=3.10"
@@ -41,16 +41,17 @@ dependencies = [
4141
"anndata",
4242
"bbknn",
4343
"celltypist",
44-
"harmonypy",
44+
"harmonypy<0.1.0",
4545
"huggingface-hub",
4646
"onclass",
4747
"scanorama",
4848
"scanpy",
4949
"scikit-misc",
5050
"scvi-tools",
51-
"tensorflow",
51+
"tensorflow<2.18",
5252
"huggingface",
5353
"xgboost",
54+
"scikit-learn<1.8.0",
5455
"faiss-cpu",
5556
"leidenalg",
5657
"cellxgene_census",

tests/core/test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def test_annotation_hub(private: bool):
9595
"references": "Tabula Sapiens reveals transcription factor expression, senescence effects, and sex-specific features in cell types from 28 human organs and tissues, The Tabula Sapiens Consortium; bioRxiv, doi: https://doi.org/10.1101/2024.12.03.626516",
9696
"license_info": "cc-by-4.0",
9797
}
98-
hmch = popv.hub.HubModelCardHelper.from_dir(output_folder, anndata_version=anndata.__version__, **model_json)
98+
hmch = popv.hub.HubModelCardHelper.from_dir(output_folder, anndata_version=anndata.__version__, popv_version=popv.__version__, **model_json)
9999
hm = popv.hub.HubMetadata.from_anndata(
100100
adata,
101101
popv_version=popv.__version__,

0 commit comments

Comments
 (0)