Merge branch 'main' into inline_example_pca_variance_ratio

flying-sheep · web-flow · commit a139ffca8fe9 · 2026-01-23T14:09:15.000+01:00
diff --git a/docs/api/metrics.md b/docs/api/metrics.md
@@ -15,6 +15,7 @@ Collections of useful measurements for evaluating results.
    :nosignatures:
    :toctree: ../generated/
 
+   metrics.modularity
    metrics.confusion_matrix
    metrics.gearys_c
    metrics.morans_i
diff --git a/docs/conf.py b/docs/conf.py
@@ -260,7 +260,10 @@ def setup(app: Sphinx) -> None:
     "scanpy.plotting._dotplot.DotPlot": "scanpy.pl.DotPlot",
     "scanpy.plotting._stacked_violin.StackedViolin": "scanpy.pl.StackedViolin",
     "pandas.core.series.Series": "pandas.Series",
+    # https://github.com/pandas-dev/pandas/issues/63810
+    "pandas.api.typing.aliases.AnyArrayLike": ("doc", "pandas:reference/aliases"),
     "numpy.bool_": "numpy.bool",  # Since numpy 2, numpy.bool is the canonical dtype
+    "numpy.typing.ArrayLike": ("py:data", "numpy.typing.ArrayLike"),
 }
 
 nitpick_ignore = [
diff --git a/docs/release-notes/3613.feat.md b/docs/release-notes/3613.feat.md
@@ -0,0 +1 @@
+Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`
diff --git a/hatch.toml b/hatch.toml
@@ -4,6 +4,7 @@ dependency-groups = [ "dev" ]
 
 [envs.docs]
 dependency-groups = [ "doc" ]
+extra-dependencies = [ "pandas>=3" ]
 scripts.build = "sphinx-build -M html docs docs/_build -W {args}"
 scripts.open = "python3 -m webbrowser -t docs/_build/html/index.html"
 scripts.clean = "git clean -fdX -- {args:docs}"
diff --git a/pyproject.toml b/pyproject.toml
@@ -113,7 +113,7 @@ test = [
 doc = [
     "sphinx>=8.2.3",
     "sphinx-book-theme>=1.1.0",
-    "scanpydoc>=0.16",
+    "scanpydoc>=0.16.1",
     "sphinx-autodoc-typehints>=1.25.2",
     "sphinx-issues>=5.0.1",
     "myst-parser>=2",
@@ -196,7 +196,8 @@ filterwarnings = [
     # https://github.com/matplotlib/matplotlib/pull/30589
     "ignore:.*'(oneOf|parseString|resetCache|enablePackrat|leaveWhitespace|setName|setParseAction|endQuoteChar|unquoteResults)'.*'(one_of|parse_string|reset_cache|enable_packrat|leave_whitespace|set_name|set_parse_action|end_quote_char|unquote_results)':DeprecationWarning:matplotlib",
     "ignore:.*'(parseAll)'.*'(parse_all)':DeprecationWarning",
-
+    # igraph vs leidenalg warning
+    "ignore:The `igraph` implementation of leiden clustering:UserWarning",
 ]
 
 [tool.coverage.run]
diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py
@@ -883,20 +883,25 @@ class NeighborsView:
         This defines where to look for neighbors dictionary,
         connectivities, distances.
 
-        neigh = NeighborsView(adata, key)
-        neigh['distances']
-        neigh['connectivities']
-        neigh['params']
-        'connectivities' in neigh
-        'params' in neigh
-
-        is the same as
-
-        adata.obsp[adata.uns[key]['distances_key']]
-        adata.obsp[adata.uns[key]['connectivities_key']]
-        adata.uns[key]['params']
-        adata.uns[key]['connectivities_key'] in adata.obsp
-        'params' in adata.uns[key]
+    Examples
+    --------
+    >>> import scanpy as sc
+    >>> adata = sc.datasets.pbmc68k_reduced()
+    >>> key = "neighbors"
+
+    >>> neigh = NeighborsView(adata, key)
+    >>> d = neigh["distances"]
+    >>> c = neigh["connectivities"]
+    >>> p = neigh["params"]
+
+    is the same as doing this manually
+
+    >>> d_key = adata.uns[key].get("distances_key", "distances")
+    >>> c_key = adata.uns[key].get("connectivities_key", "connectivities")
+    >>> assert d is adata.obsp[d_key]
+    >>> assert c is adata.obsp[c_key]
+    >>> assert p is adata.uns[key]["params"]
+    >>> assert c_key in adata.obsp
 
     """
 
diff --git a/src/scanpy/metrics/__init__.py b/src/scanpy/metrics/__init__.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from ._gearys_c import gearys_c
-from ._metrics import confusion_matrix
+from ._metrics import confusion_matrix, modularity
 from ._morans_i import morans_i
 
-__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
+__all__ = ["confusion_matrix", "gearys_c", "modularity", "morans_i"]
diff --git a/src/scanpy/metrics/_metrics.py b/src/scanpy/metrics/_metrics.py
@@ -2,15 +2,28 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, overload
 
 import numpy as np
 import pandas as pd
+from anndata import AnnData
 from natsort import natsorted
 from pandas.api.types import CategoricalDtype
 
+from .._utils import NeighborsView
+
 if TYPE_CHECKING:
     from collections.abc import Sequence
+    from typing import Literal
+
+    if TYPE_CHECKING:
+        from pandas.api.typing.aliases import AnyArrayLike
+    else:  # sphinx-autodoc-typehints will execute the outer block, but end up here:
+        AnyArrayLike = type(
+            "AnyArrayLike", (), dict(__module__="pandas.api.typing.aliases")
+        )
+
+    from .._compat import SpBase
 
 
 def confusion_matrix(
@@ -89,3 +102,119 @@ def confusion_matrix(
     df = df.loc[np.array(orig_idx), np.array(new_idx)]
 
     return df
+
+
+@overload
+def modularity(
+    connectivities: AnyArrayLike | SpBase, /, labels: AnyArrayLike, *, is_directed: bool
+) -> float: ...
+
+
+@overload
+def modularity(
+    adata: AnnData,
+    /,
+    labels: str | AnyArrayLike = "leiden",
+    *,
+    neighbors_key: str | None = None,
+    mode: Literal["calculate", "update", "retrieve"] = "calculate",
+) -> float: ...
+
+
+def modularity(
+    adata_or_connectivities: AnnData | AnyArrayLike | SpBase,
+    /,
+    labels: str | AnyArrayLike = "leiden",
+    *,
+    neighbors_key: str | None = None,
+    is_directed: bool | None = None,
+    mode: Literal["calculate", "update", "retrieve"] = "calculate",
+) -> float:
+    """Compute the modularity of a graph given its connectivities and labels.
+
+    Parameters
+    ----------
+    adata_or_connectivities
+        The AnnData object containing the data or a weighted adjacency matrix representing the graph.
+    labels
+        Cluster labels for each node in the graph.
+        When `AnnData` is provided, this can be the key in `adata.obs` that contains the clustering labels and defaults to `"leiden"`.
+    neighbors_key
+        When `AnnData` is provided, the key in `adata.obsp` that contains the connectivities.
+    is_directed
+        Whether the connectivities are directed or undirected.
+        Always `False` if `AnnData` is provided, as connectivities are derived from (symmetric) neighbors.
+    mode
+        When `AnnData` is provided,
+        this controls if the stored modularity is retrieved,
+        or if we should calculate it (and optionally update it in `adata.uns[labels]`).
+
+    Returns
+    -------
+    The modularity of the graph based on the provided clustering.
+    """
+    if isinstance(adata_or_connectivities, AnnData):
+        if is_directed:
+            msg = f"Connectivities stored in `AnnData` are undirected, can’t specify `{is_directed=!r}`"
+            raise ValueError(msg)
+        return modularity_adata(
+            adata_or_connectivities,
+            labels=labels,
+            neighbors_key=neighbors_key,
+            mode=mode,
+        )
+    if isinstance(labels, str):
+        msg = "`labels` must be provided as array when passing a connectivities array"
+        raise TypeError(msg)
+    if is_directed is None:
+        msg = "`is_directed` must be provided when passing a connectivities array"
+        raise TypeError(msg)
+    return modularity_array(
+        adata_or_connectivities, labels=labels, is_directed=is_directed
+    )
+
+
+def modularity_adata(
+    adata: AnnData,
+    /,
+    *,
+    labels: str | AnyArrayLike,
+    neighbors_key: str | None,
+    mode: Literal["calculate", "update", "retrieve"],
+) -> float:
+    if mode in {"retrieve", "update"} and not isinstance(labels, str):
+        msg = "`labels` must be a string when `mode` is `'retrieve'` or `'update'`"
+        raise ValueError(msg)
+    if mode == "retrieve":
+        return adata.uns[labels]["modularity"]
+
+    labels_vec = adata.obs[labels] if isinstance(labels, str) else labels
+    connectivities = NeighborsView(adata, neighbors_key)["connectivities"]
+
+    # distances are treated as symmetric, so connectivities as well
+    m = modularity(connectivities, labels_vec, is_directed=False)
+    if mode == "update":
+        adata.uns[labels]["modularity"] = m
+    return m
+
+
+def modularity_array(
+    connectivities: AnyArrayLike | SpBase, /, *, labels: AnyArrayLike, is_directed: bool
+) -> float:
+    try:
+        import igraph as ig
+    except ImportError as e:  # pragma: no cover
+        msg = "igraph is require for computing modularity"
+        raise ImportError(msg) from e
+    igraph_mode: str = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
+    graph: ig.Graph = ig.Graph.Weighted_Adjacency(connectivities, mode=igraph_mode)
+    return graph.modularity(_codes(labels))
+
+
+def _codes(labels: AnyArrayLike) -> AnyArrayLike:
+    """Convert cluster labels to integer codes as required by igraph."""
+    if isinstance(labels, pd.Series):
+        labels = labels.astype("category").array
+    if not isinstance(labels, pd.Categorical):
+        labels = pd.Categorical(labels)
+    return labels.codes
diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 import pandas as pd
@@ -29,7 +29,7 @@
             MutableVertexPartition.__module__ = "leidenalg.VertexPartition"
 
 
-def leiden(  # noqa: PLR0912, PLR0913, PLR0915
+def leiden(  # noqa: PLR0913
     adata: AnnData,
     resolution: float = 1,
     *,
@@ -47,7 +47,7 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
     flavor: Literal["leidenalg", "igraph"] | None = None,
     **clustering_args,
 ) -> AnnData | None:
-    """Cluster cells into subgroups :cite:p:`Traag2019`.
+    r"""Cluster cells into subgroups :cite:p:`Traag2019`.
 
     Cluster cells using the Leiden algorithm :cite:p:`Traag2019`,
     an improved version of the Louvain algorithm :cite:p:`Blondel2008`.
@@ -120,34 +120,15 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         A dict with the values for the parameters `resolution`, `random_state`,
         and `n_iterations`.
 
+    `adata.uns['leiden' | key_added]['modularity']` : :class:`float`
+        The modularity score of the final clustering,
+        as calculated by the `flavor`.
+        Use :func:`scanpy.metrics.modularity`\ `(adata, mode='calculate' | 'update')`
+        to calculate a score independent of `flavor`.
+
     """
-    if flavor is None:
-        flavor = "leidenalg"
-        msg = (
-            "In the future, the default backend for leiden will be igraph instead of leidenalg. "
-            "To achieve the future defaults please pass: `flavor='igraph'` and `n_iterations=2`. "
-            "`directed` must also be `False` to work with igraph’s implementation."
-        )
-        warn(msg, FutureWarning)
-    if flavor not in {"igraph", "leidenalg"}:
-        msg = (
-            f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed"
-        )
-        raise ValueError(msg)
+    flavor = _validate_flavor(flavor, partition_type=partition_type, directed=directed)
     _utils.ensure_igraph()
-    if flavor == "igraph":
-        if directed:
-            msg = "Cannot use igraph’s leiden implementation with a directed graph."
-            raise ValueError(msg)
-        if partition_type is not None:
-            msg = "Do not pass in partition_type argument when using igraph."
-            raise ValueError(msg)
-    else:
-        try:
-            import leidenalg
-        except ImportError as e:
-            msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip install leidenalg`."
-            raise ImportError(msg) from e
     clustering_args = dict(clustering_args)
 
     start = logg.info("running Leiden clustering")
@@ -169,6 +150,8 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
     # (in the case of a partition variant that doesn't take it on input)
     clustering_args["n_iterations"] = n_iterations
     if flavor == "leidenalg":
+        import leidenalg
+
         if resolution is not None:
             clustering_args["resolution_parameter"] = resolution
         directed = True if directed is None else directed
@@ -178,7 +161,10 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         if use_weights:
             clustering_args["weights"] = np.array(g.es["weight"]).astype(np.float64)
         clustering_args["seed"] = random_state
-        part = leidenalg.find_partition(g, partition_type, **clustering_args)
+        part = cast(
+            "MutableVertexPartition",
+            leidenalg.find_partition(g, partition_type, **clustering_args),
+        )
     else:
         g = _utils.get_igraph_from_adjacency(adjacency, directed=False)
         if use_weights:
@@ -212,6 +198,7 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         random_state=random_state,
         n_iterations=n_iterations,
     )
+    adata.uns[key_added]["modularity"] = part.modularity
     logg.info(
         "    finished",
         time=start,
@@ -221,3 +208,38 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         ),
     )
     return adata if copy else None
+
+
+def _validate_flavor(
+    flavor: str | None, *, partition_type: object | None, directed: bool | None
+) -> Literal["igraph", "leidenalg"]:
+    match flavor:
+        case "igraph":
+            if directed:
+                msg = "Cannot use igraph’s leiden implementation with a directed graph."
+                raise ValueError(msg)
+            if partition_type is not None:
+                msg = "Do not pass in partition_type argument when using igraph."
+                raise ValueError(msg)
+        case None | "leidenalg":
+            msg = (
+                "The `igraph` implementation of leiden clustering is *orders of magnitude faster*. "
+                "Set the flavor argument to (and install if needed) 'igraph' to use it."
+            )
+            if flavor is None:
+                msg += (
+                    "\nIn the future, the default backend for leiden will be igraph instead of leidenalg. "
+                    "To achieve the future defaults please pass: `flavor='igraph'` and `n_iterations=2`. "
+                    "`directed` must also be `False` to work with igraph’s implementation."
+                )
+            warn(msg, FutureWarning if flavor is None else UserWarning)
+            try:
+                import leidenalg  # noqa: F401
+            except ImportError as e:
+                msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip install leidenalg`."
+                raise ImportError(msg) from e
+            flavor = "leidenalg"
+        case _:
+            msg = f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed"
+            raise ValueError(msg)
+    return flavor
diff --git a/tests/test_clustering.py b/tests/test_clustering.py
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`