Skip to content

Commit 78ace2c

Browse files
flying-sheepilan-goldpre-commit-ci[bot]
authored
docs: document array type support (#3895)
Co-authored-by: Ilan Gold <ilanbassgold@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 22845a1 commit 78ace2c

32 files changed

Lines changed: 468 additions & 10 deletions

docs/_static/custom.css

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
code.literal > a > code.literal {
2+
border: none;
3+
padding: 0;
4+
font-size: inherit;
5+
}

docs/api/index.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,19 @@ import scanpy as sc
1010
Additional functionality is available in the broader {doc}`ecosystem <../ecosystem>`, with some tools being wrapped in the {mod}`scanpy.external` module.
1111
```
1212

13+
(array-support)=
14+
## Array type support
15+
16+
Different APIs have different levels of support for array types,
17+
and this page lists the supported array types for each function:
18+
19+
```{eval-rst}
20+
.. array-support:: all
21+
```
22+
1323
```{toctree}
1424
:maxdepth: 2
25+
:hidden:
1526
1627
preprocessing
1728
tools

docs/conf.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,39 @@
171171
)
172172

173173

174+
array_support: dict[str, tuple[list[str], list[str]]] = {
175+
"experimental.pp.highly_variable_genes": (["np", "sp"], []),
176+
"get.aggregate": (["np", "sp", "da"], []),
177+
"pp.calculate_qc_metrics": (["np", "sp", "da"], []),
178+
"pp.combat": (["np"], []),
179+
"pp.downsample_counts": (["np", "sp[csr]"], []),
180+
"pp.filter_cells": (["np", "sp", "da"], []),
181+
"pp.filter_genes": (["np", "sp", "da"], []),
182+
"pp.highly_variable_genes": (["np", "sp", "da"], ["da[sp[csc]]"]),
183+
"pp.log1p": (["np", "sp", "da"], []),
184+
"pp.neighbors": (["np", "sp"], []),
185+
"pp.normalize_total": (["np", "sp[csr]", "da"], []),
186+
"pp.pca": (["np", "sp", "da"], ["da[sp[csc]]"]),
187+
"pp.regress_out": (["np"], []),
188+
"pp.sample": (["np", "sp", "da"], []),
189+
"pp.scale": (["np", "sp", "da"], []),
190+
"pp.scrublet": (["np", "sp"], []),
191+
"pp.scrublet_simulate_doublets": (["np", "sp"], []),
192+
"tl.dendrogram": (["np", "sp"], []),
193+
"tl.diffmap": (["np", "sp"], []),
194+
"tl.dpt": (["np", "sp"], []),
195+
"tl.draw_graph": (["np", "sp"], []), # only uses graph in obsp
196+
"tl.embedding_density": (["np"], []),
197+
"tl.ingest": (["np", "sp"], []),
198+
"tl.leiden": (["np", "sp"], []), # only uses graph in obsp
199+
"tl.louvain": (["np", "sp"], []), # only uses graph in obsp
200+
"tl.paga": (["np", "sp"], []),
201+
"tl.rank_genes_groups": (["np", "sp"], []),
202+
"tl.tsne": (["np", "sp"], []),
203+
"tl.umap": (["np", "sp"], []),
204+
}
205+
206+
174207
# -- Options for HTML output ----------------------------------------------
175208

176209
# The theme is sphinx-book-theme, with patches for readthedocs-sphinx-search
@@ -180,12 +213,13 @@
180213
"use_repository_button": True,
181214
}
182215
html_static_path = ["_static"]
216+
html_css_files = ["custom.css"]
183217
html_show_sphinx = False
184218
html_logo = "_static/img/Scanpy_Logo_BrightFG.svg"
185219
html_title = "scanpy"
186220

187221

188-
def setup(app: Sphinx):
222+
def setup(app: Sphinx) -> None:
189223
"""App setup hook."""
190224
app.add_generic_role("small", partial(nodes.inline, classes=["small"]))
191225
app.add_generic_role("smaller", partial(nodes.inline, classes=["smaller"]))

docs/extensions/array_support.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
"""Add `array-support` directive."""
2+
3+
from __future__ import annotations
4+
5+
from itertools import groupby
6+
from typing import TYPE_CHECKING
7+
8+
from docutils import nodes
9+
from sphinx.util.docutils import SphinxDirective
10+
11+
from scanpy._utils import _docs
12+
13+
if TYPE_CHECKING:
14+
from collections.abc import Collection, Generator, Iterable, Sequence
15+
from typing import ClassVar
16+
17+
from sphinx.application import Sphinx
18+
19+
20+
ALL_INNER = list(_docs.parse(["np", "sp"], inner=True))
21+
22+
23+
class ArraySupport(SphinxDirective):
24+
"""Document array support."""
25+
26+
required_arguments: ClassVar = 1
27+
28+
@property
29+
def _array_support(self) -> dict[str, tuple[list[str], list[str]]]:
30+
return self.config.array_support
31+
32+
def run(self) -> list[nodes.Node]: # noqa: D102
33+
if self.arguments[0] == "all":
34+
return self._render_overview()
35+
36+
if not self.arguments[0] not in self._array_support:
37+
self.error(
38+
f"API not in `array_support`, add it in `docs/conf.py`: {self.arguments[0]}"
39+
)
40+
array_types = list(_docs.parse(*self._array_support[self.arguments[0]]))
41+
headers = (
42+
"Array type",
43+
"supported",
44+
"… experimentally in dask :class:`~dask.array.Array`",
45+
)
46+
data: list[tuple[_docs.Inner, bool, bool]] = []
47+
for array_type in ALL_INNER:
48+
dask_array_type = _docs.DaskArray(array_type)
49+
data.append((
50+
array_type,
51+
array_type in array_types,
52+
dask_array_type in array_types,
53+
))
54+
55+
title = nodes.title("", "", *self.parse_inline(":ref:`array-support`")[0])
56+
rows = self._render_support_data(data)
57+
return self._render_table(headers, rows, title=title)
58+
59+
def _render_overview(self) -> list[nodes.Node]:
60+
headers = ["Function", *(at.rst(short=True) for at in ALL_INNER)]
61+
rows: list[nodes.row] = []
62+
for fn, (include, exclude) in self._array_support.items():
63+
row_header, _ = self.parse_inline(f":func:`scanpy.{fn}`")
64+
ats = frozenset(_docs.parse(include, exclude))
65+
cells: list[Sequence[nodes.Node]] = [
66+
row_header,
67+
*(
68+
self._render_support(at in ats, dask=dt in ats)
69+
for at, dt in zip(
70+
ALL_INNER, map(_docs.DaskArray, ALL_INNER), strict=True
71+
)
72+
),
73+
]
74+
rows.append(
75+
nodes.row(
76+
"",
77+
*(
78+
nodes.entry("", nodes.paragraph("", "", *cell))
79+
for cell in cells
80+
),
81+
)
82+
)
83+
return self._render_table(headers, rows)
84+
85+
def _render_support_data(
86+
self,
87+
data: list[tuple[_docs.Inner, bool, bool]],
88+
) -> Generator[nodes.row, None, None]:
89+
for t, group in groupby(data, key=lambda r: type(r[0])):
90+
group = list(group) # noqa: PLW2901
91+
if ( # if all sparse types have the same support, just one row
92+
t is _docs.ScipySparse
93+
and (support := one({s for _, s, _ in group})) is not None
94+
and (in_dask := one({d for _, _, d in group})) is not None
95+
):
96+
refs: list[nodes.Node] = [
97+
nodes.inline("", "scipy.sparse.{"),
98+
*self.parse_inline(":class:`csr <scipy.sparse.csr_array>`")[0],
99+
nodes.inline("", ","),
100+
*self.parse_inline(":class:`csc <scipy.sparse.csc_matrix>`")[0],
101+
nodes.inline("", "}_{"),
102+
*self.parse_inline(":class:`array <scipy.sparse.csc_array>`")[0],
103+
nodes.inline("", ","),
104+
*self.parse_inline(":class:`matrix <scipy.sparse.csr_matrix>`")[0],
105+
nodes.inline("", "}"),
106+
]
107+
header = [nodes.literal("", "", *refs)]
108+
yield self._render_row(header, support=support, in_dask=in_dask)
109+
else: # otherwise, show them individually
110+
for array_type, support, in_dask in group:
111+
yield self._render_row(
112+
self._render_array_type(array_type),
113+
support=support,
114+
in_dask=in_dask,
115+
)
116+
117+
def _render_row(
118+
self, header: Sequence[nodes.Node], *, support: bool, in_dask: bool
119+
) -> nodes.row:
120+
cells: list[Sequence[nodes.Node]] = [
121+
header,
122+
self._render_support(support),
123+
self._render_support(in_dask),
124+
]
125+
children = (nodes.entry("", nodes.paragraph("", "", *cell)) for cell in cells)
126+
return nodes.row("", *children)
127+
128+
def _render_table(
129+
self,
130+
headers: Collection[str],
131+
rows: Iterable[nodes.row],
132+
*,
133+
title: nodes.title | None = None,
134+
) -> list[nodes.Node]:
135+
colspecs = [
136+
nodes.colspec(stub=True),
137+
*(nodes.colspec() for _ in range(len(headers) - 1)),
138+
]
139+
header_nodes = [
140+
nodes.entry("", nodes.paragraph("", "", *self.parse_inline(t)[0]))
141+
for t in headers
142+
]
143+
thead = nodes.thead("", nodes.row("", *header_nodes))
144+
tbody = nodes.tbody("", *rows)
145+
return [
146+
nodes.table(
147+
"",
148+
*([title] if title else []),
149+
nodes.tgroup("", *colspecs, thead, tbody, cols=len(colspecs)),
150+
ids=["array-support"],
151+
)
152+
]
153+
154+
def _render_support(
155+
self,
156+
support: bool, # noqa: FBT001
157+
/,
158+
*,
159+
dask: bool = False,
160+
) -> Sequence[nodes.Node]:
161+
dask_expl = "Also supports this type as chunk in a dask Array"
162+
return [
163+
nodes.Text(("✅" if support else "❌") + " " * dask),
164+
*([nodes.abbreviation(text="⚡", explanation=dask_expl)] if dask else []),
165+
]
166+
167+
def _render_array_type(self, array_type: _docs.ArrayType, /) -> list[nodes.Node]:
168+
nodes_, msgs = self.parse_inline(array_type.rst())
169+
assert not msgs, msgs
170+
return nodes_
171+
172+
173+
def one[T](arg: Collection[T]) -> T | None:
174+
"""Return the only item in `arg` or None if `arg` is not of length 1."""
175+
try:
176+
[item] = arg
177+
except ValueError:
178+
return None
179+
return item
180+
181+
182+
def setup(app: Sphinx) -> None:
183+
"""App setup hook."""
184+
app.add_directive("array-support", ArraySupport)
185+
app.add_config_value("array_support", {}, "env")

docs/release-notes/3895.docs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Document array type support for most functions in {mod}`~scanpy.pp` and {mod}`~scanpy.tl` {smaller}`P Angerer`

src/scanpy/_utils/_docs.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""Utilities for `array-support` directive (see `/docs/extensions/array_support.py`)."""
2+
3+
from __future__ import annotations
4+
5+
import re
6+
from abc import ABC, abstractmethod
7+
from dataclasses import dataclass
8+
from typing import TYPE_CHECKING, overload
9+
10+
if TYPE_CHECKING:
11+
from collections.abc import Collection, Generator
12+
from typing import Literal
13+
14+
15+
__all__ = ["ArrayType", "DaskArray", "Numpy", "ScipySparse", "parse"]
16+
17+
18+
class ArrayType(ABC):
19+
def rst(self, *, short: bool = False) -> str: # pragma: no cover
20+
return f":class:`{'~' if short else ''}{self}`"
21+
22+
@abstractmethod
23+
def __hash__(self) -> int: ...
24+
25+
26+
@dataclass(unsafe_hash=True, frozen=True)
27+
class Numpy(ArrayType):
28+
def __str__(self) -> str: # pragma: no cover
29+
return "numpy.ndarray"
30+
31+
def rst(self, *, short: bool = False) -> str: # pragma: no cover
32+
return f":class:`{'~' if short else ''}{self}`"
33+
34+
35+
@dataclass(unsafe_hash=True, frozen=True)
36+
class ScipySparse(ArrayType):
37+
format: Literal["csr", "csc"]
38+
39+
def __str__(self) -> str: # pragma: no cover
40+
return f"scipy.sparse.{self.format}_{{array,matrix}}"
41+
42+
def rst(self, *, short: bool = False) -> str: # pragma: no cover
43+
return (
44+
f":class:`{'~' if short else ''}scipy.sparse.{self.format}_array` / "
45+
f":class:`~scipy.sparse.{self.format}_matrix`"
46+
)
47+
48+
49+
type Inner = Numpy | ScipySparse
50+
51+
52+
@dataclass(unsafe_hash=True, frozen=True)
53+
class DaskArray(ArrayType):
54+
chunk: Inner
55+
56+
def __str__(self) -> str: # pragma: no cover
57+
return f"dask.array.Array[{self.chunk}]"
58+
59+
def rst(self, *, short: bool = False) -> str: # pragma: no cover
60+
return rf":class:`{'~' if short else ''}dask.array.Array`\ \[{self.chunk.rst(short=short)}\]"
61+
62+
63+
@overload
64+
def parse(
65+
include: Collection[str],
66+
exclude: Collection[str] = (),
67+
*,
68+
inner: Literal[False] = False,
69+
) -> Generator[ArrayType]: ...
70+
@overload
71+
def parse(
72+
include: Collection[str], exclude: Collection[str] = (), *, inner: Literal[True]
73+
) -> Generator[Inner]: ...
74+
def parse(
75+
include: Collection[str], exclude: Collection[str] = (), *, inner: bool = False
76+
) -> Generator[ArrayType]:
77+
if exclude:
78+
excluded = dict.fromkeys(parse(exclude)).keys()
79+
yield from (t for t in parse(include) if t not in excluded)
80+
return
81+
82+
inner_includes = [i for i in include if not i.startswith("da")]
83+
for t in include:
84+
if (
85+
match := re.fullmatch(r"([^\[]+)(?:\[(.+)\])?", t)
86+
) is None: # pragma: no cover
87+
msg = f"invalid {t!r}"
88+
raise ValueError(msg)
89+
mod, tags = match.groups("")
90+
if mod == "da" and inner: # pragma: no cover
91+
msg = "Can’t nest dask arrays"
92+
raise ValueError(msg)
93+
tags = set(re.split(r",(?![^\[]+\])", tags)) if tags else set()
94+
yield from _parse_mod(mod, tags, inner_includes=inner_includes)
95+
96+
97+
def _parse_mod(
98+
mod: str, tags: set[str], *, inner_includes: Collection[str]
99+
) -> Generator[ArrayType]:
100+
match mod:
101+
case "np":
102+
if tags: # pragma: no cover
103+
msg = f"`np` takes no tags {tags!r}"
104+
raise ValueError(msg)
105+
yield Numpy()
106+
case "sp":
107+
if tags - {"csr", "csc"}: # pragma: no cover
108+
msg = f"invalid tags {tags!r}"
109+
raise ValueError(msg)
110+
for format in ("csr", "csc"):
111+
if tags & {"csr", "csc"} and format not in tags:
112+
continue
113+
yield ScipySparse(format=format)
114+
case "da":
115+
for chunk in parse(tags if tags else inner_includes, inner=True):
116+
yield DaskArray(chunk=chunk)
117+
case _: # pragma: no cover
118+
msg = f"invalid module {mod!r}"
119+
raise ValueError(msg)

src/scanpy/experimental/pp/_highly_variable_genes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ def highly_variable_genes( # noqa: PLR0913
320320
321321
Expects raw count input.
322322
323+
.. array-support:: experimental.pp.highly_variable_genes
324+
323325
Parameters
324326
----------
325327
{adata}

0 commit comments

Comments
 (0)