diff --git a/CHANGELOG.md b/CHANGELOG.md index 798954a..620d51f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning][]. ## [Unreleased] +### Added +- Included tests for the `check` module, and more tests for the main classes. + ## [v0.1.1] ### Changed diff --git a/LICENSE b/LICENSE index cc53920..a1d9b40 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025, Marius Lange +Copyright (c) 2025, QuaDBioLab Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index d466d27..dfed7cc 100644 --- a/README.md +++ b/README.md @@ -10,34 +10,46 @@ [badge-pre-commit]: https://results.pre-commit.ci/badge/github/quadbio/cellmapper/main.svg [badge-pypi]: https://img.shields.io/pypi/v/cellmapper.svg -k-NN-based mapping of cells across representations to tranfer labels, embeddings and expression values. Works for millions of cells, on CPU and GPU, across molecular modalities, between spatial and non-spatial data, for arbitrary query and reference datasets. Using `faiss` to compute k-NN graphs, CellMapper takes about 30 seconds to transfer cell type labels from 1.5M cells to 1.5M cells on a single RTX 4090 with 60 GB CPU memory. +k-NN-based mapping of cells across representations to tranfer labels, embeddings and expression values. Works for millions of cells, on CPU and GPU, across molecular modalities, between spatial and non-spatial data, for arbitrary query and reference datasets. Using [faiss][] to compute k-NN graphs, CellMapper takes about 30 seconds to transfer cell type labels from 1.5M cells to 1.5M cells on a single RTX 4090 with 60 GB CPU memory. -## Getting started - -Please refer to the [documentation][], -in particular, the [API documentation][]. +Inspired by scanpy's [ingest][] and the [HNOCA-tools][] packages. ## Installation You need to have Python 3.10 or newer installed on your system. If you don't have Python installed, we recommend installing [uv][]. -There are several alternative options to install cellmapper: +There are two alternative options to install ``cellmapper``: - + ```bash + pip install cellmapper + ``` + +- **Install the latest development version**: + + ```bash + pip install git+https://github.com/quadbio/cellmapper.git@main + ``` -1. Install the latest development version: +## Getting started + +This package assumes that you have ``ref`` and ``query`` AnnData objects, with a joint embedding computed and stored in ``.obsm``. We explicilty do not compute this joint embedding, but there are plenty of method you can use to get such joint embeddings, e.g. [GimVI][] or [ENVI][] for spatial mapping, [GLUE][], [MIDAS][] and [MOFA+][] for modality translation, and [scVI][], [scANVI][] and [scArches][] for query-to-reference mapping - this is just a small selection! + +With a joint embedding in ``.obsm["X_joint"]`` at hand, the simplest way to use ``CellMapper`` is as follows: +```Python +from cellmapper import CellMapper -```bash -pip install git+https://github.com/quadbio/cellmapper.git@main +cmap = CellMapper(ref, query).fit( + use_rep="X_joint", obs_keys="celltype", obsm_keys="X_umap", layer_key="X" + ) ``` +This will transfer data from the reference to the query dataset, including celltype labels stored in ``ref.obs``, a UMAP embedding stored in ``ref.obsm``, and expression values stored in ``ref.X``. + +There are many ways to customize this, e.g. use different ways to compute k-NN graphs and to turn them into mapping matrices, and we implement a few methods to evaluate whether your k-NN transfer was sucessful. + ## Release notes See the [changelog][]. @@ -59,3 +71,16 @@ Please cite this GitHub repo if you find CellMapper useful for your research. [coverage]: https://codecov.io/gh/quadbio/cellmapper [pre-commit]: https://results.pre-commit.ci/latest/github/quadbio/cellmapper/main [pypi]: https://pypi.org/project/cellmapper/ +[faiss]: https://github.com/facebookresearch/faiss + +[ingest]: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.ingest.html +[HNOCA-tools]: https://devsystemslab.github.io/HNOCA-tools/ + +[GimVI]: https://docs.scvi-tools.org/en/stable/api/reference/scvi.external.GIMVI.html# +[ENVI]: https://scenvi.readthedocs.io/en/latest/# +[GLUE]: https://scglue.readthedocs.io/en/latest/ +[MIDAS]: https://scmidas.readthedocs.io/en/latest/ +[MOFA+]: https://muon.readthedocs.io/en/latest/omics/multi.html +[scVI]: https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html +[scANVI]: https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCANVI.html +[scArches]: https://docs.scarches.org/en/latest/ diff --git a/src/cellmapper/check.py b/src/cellmapper/check.py index e97a339..776a963 100644 --- a/src/cellmapper/check.py +++ b/src/cellmapper/check.py @@ -79,4 +79,6 @@ def check_deps(*args) -> None: A list of dependencies to check """ for item in args: + if item not in CHECKERS: + raise RuntimeError(f"Dependency '{item}' is not registered in CHECKERS.") CHECKERS[item].check() diff --git a/src/cellmapper/knn.py b/src/cellmapper/knn.py index 79dddde..bb5dfcf 100644 --- a/src/cellmapper/knn.py +++ b/src/cellmapper/knn.py @@ -118,7 +118,9 @@ def knn_graph_connectivities( epsilon = kwargs.get("epsilon", 1e-8) connectivities = 1.0 / (self.distances + epsilon) else: - raise ValueError(f"Unknown kernel: {kernel}. Supported kernels are 'gaussian' and 'scarches'.") + raise ValueError( + f"Unknown kernel: {kernel}. Supported kernels are: 'gaussian', 'scarches', 'random', 'inverse_distance'." + ) rowptr = np.arange(0, self.n_samples * self.n_neighbors + 1, self.n_neighbors) return csr_matrix((connectivities.ravel().astype(dtype), self.indices.ravel(), rowptr), shape=self.shape) diff --git a/tests/test_cellmapper.py b/tests/test_cellmapper.py index c9ae36d..e746130 100644 --- a/tests/test_cellmapper.py +++ b/tests/test_cellmapper.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from cellmapper.cellmapper import CellMapper + def assert_metrics_close(actual: dict, expected: dict, atol=1e-3): for key, exp in expected.items(): @@ -66,3 +68,48 @@ def test_compute_neighbors_joint_pca(self, cmap, joint_pca_key, n_pca_components assert joint_pca_key in cmap.query.obsm assert cmap.ref.obsm[joint_pca_key].shape[1] == n_pca_components assert cmap.query.obsm[joint_pca_key].shape[1] == n_pca_components + + @pytest.mark.parametrize( + "obs_keys,obsm_keys,layer_key", + [ + ("leiden", None, None), + (None, "X_pca", None), + (None, None, "X"), + ("leiden", "X_pca", None), + ("leiden", None, "X"), + (None, "X_pca", "X"), + ("leiden", "X_pca", "X"), + ], + ) + def test_fit_various_combinations(self, cmap, obs_keys, obsm_keys, layer_key): + cmap.fit(obs_keys=obs_keys, obsm_keys=obsm_keys, layer_key=layer_key) + if obs_keys is not None: + keys = [obs_keys] if isinstance(obs_keys, str) else obs_keys + for key in keys: + assert f"{key}_pred" in cmap.query.obs + if obsm_keys is not None: + keys = [obsm_keys] if isinstance(obsm_keys, str) else obsm_keys + for key in keys: + assert f"{key}_pred" in cmap.query.obsm + if layer_key is not None: + assert cmap.query_imputed is not None + assert cmap.query_imputed.X.shape[0] == cmap.query.n_obs + + def test_transfer_labels_self_mapping(self, query_ref_adata): + """Check mapping to self.""" + _, ref = query_ref_adata + cm = CellMapper(ref, ref) + cm.fit( + knn_method="sklearn", + mapping_method="jaccard", + obs_keys="leiden", + use_rep="X_pca", + n_neighbors=1, + prediction_postfix="transfer", + ) + assert "leiden_transfer" in ref.obs + assert len(ref.obs["leiden_transfer"]) == len(ref.obs["leiden"]) + # Check that all predicted labels are valid categories + assert set(ref.obs["leiden_transfer"].cat.categories) <= set(ref.obs["leiden"].cat.categories) + # If mapping to self, labels should match + assert ref.obs["leiden_transfer"].equals(ref.obs["leiden"]) diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 0000000..18b6e5a --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,34 @@ +import packaging +import pytest + +from cellmapper import check +from cellmapper.check import Checker, check_deps + + +class TestCheck: + def test_checker_available_module(self): + # Should not raise for a real installable package + Checker("packaging").check() + + def test_checker_missing_module(self): + # Should raise RuntimeError for a missing module + with pytest.raises(RuntimeError): + Checker("not_a_real_module").check() + + def test_checker_version_requirement(self): + # Should raise if vmin is higher than installed version + installed_version = packaging.version.parse(packaging.__version__) + higher_version = str(installed_version.major + 1) + ".0.0" + with pytest.raises(RuntimeError): + Checker("packaging", vmin=higher_version).check() + + def test_check_deps_missing(self): + # Should raise for a missing dependency (not registered in CHECKERS) + with pytest.raises(RuntimeError): + check_deps("not_a_real_module") + + def test_check_deps_available(self): + # Should not raise for a real installable package + check.CHECKERS["packaging"] = Checker("packaging") + check_deps("packaging") + del check.CHECKERS["packaging"] diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index a25370d..17c5572 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from cellmapper.knn import Neighbors @@ -11,17 +12,27 @@ def assert_adjacency_equal(neigh1, neigh2, attrs=("xx", "yy", "xy", "yx")): class TestNeighbors: - def test_neighbors_sklearn_vs_pynndescent(self, small_data): + @pytest.mark.parametrize("only_yx", [False, True]) + def test_neighbors_sklearn_vs_pynndescent(self, small_data, only_yx): x, y = small_data n_neighbors = 3 # sklearn neigh_skl = Neighbors(x, y) - neigh_skl.compute_neighbors(n_neighbors=n_neighbors, method="sklearn") + neigh_skl.compute_neighbors(n_neighbors=n_neighbors, method="sklearn", only_yx=only_yx) # pynndescent neigh_pynn = Neighbors(x, y) - neigh_pynn.compute_neighbors(n_neighbors=n_neighbors, method="pynndescent") - # Compare adjacency matrices - assert_adjacency_equal(neigh_skl, neigh_pynn) + neigh_pynn.compute_neighbors(n_neighbors=n_neighbors, method="pynndescent", only_yx=only_yx) + if only_yx: + with pytest.raises(ValueError): + neigh_skl.get_adjacency_matrices() + with pytest.raises(ValueError): + neigh_pynn.get_adjacency_matrices() + else: + assert_adjacency_equal(neigh_skl, neigh_pynn) + # Always compare connectivities (yx) + conn_skl = neigh_skl.yx.knn_graph_connectivities() + conn_pynn = neigh_pynn.yx.knn_graph_connectivities() + assert np.allclose(conn_skl.toarray(), conn_pynn.toarray(), atol=1e-6) def test_neighbors_repr(self, small_data): x, y = small_data