Skip to content
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
de9c481
feat: support np.random.Generator
flying-sheep Feb 23, 2026
8ab6661
add decorator
flying-sheep Feb 24, 2026
1ef8780
scrublet
flying-sheep Feb 24, 2026
5308a1a
almost done
flying-sheep Feb 24, 2026
93a8c0b
Merge branch 'main' into pa/rng
flying-sheep Feb 26, 2026
32b3ddc
fix scrublet_simulate_doublets
flying-sheep Feb 26, 2026
c3da2bb
fix _RNGIgraph compat
flying-sheep Feb 26, 2026
2c82b67
Merge branch 'main' into pa/rng
flying-sheep Feb 26, 2026
bd85d95
whoops
flying-sheep Feb 26, 2026
8247cdb
relnote
flying-sheep Feb 26, 2026
47f3ceb
don’t store rng in random_state arg
flying-sheep Feb 26, 2026
1e43b2a
make consistent
flying-sheep Feb 26, 2026
64a0f26
use sub-generators
flying-sheep Feb 27, 2026
baf2c85
docs
flying-sheep Feb 27, 2026
7e2fab5
paga
flying-sheep Feb 27, 2026
8ad699a
test
flying-sheep Feb 27, 2026
a4b2d12
Selman’s findings
flying-sheep Feb 27, 2026
0c30aa4
ingest
flying-sheep Mar 9, 2026
e8a411e
rename
flying-sheep Mar 9, 2026
4077502
spawn docs
flying-sheep Mar 9, 2026
5797129
fix paga
flying-sheep Mar 9, 2026
9bc61f0
docs reuse
flying-sheep Mar 9, 2026
43ad1ee
fix docs
flying-sheep Mar 10, 2026
c98ce73
no spawning without loops/parallel
flying-sheep Mar 12, 2026
93a9d90
test
flying-sheep Mar 12, 2026
11155c3
undo spawn param
flying-sheep Mar 12, 2026
4570320
fix pca
flying-sheep Mar 13, 2026
d4814ee
more bench
flying-sheep Mar 13, 2026
4d82e6b
fix tests
flying-sheep Mar 13, 2026
65fb583
whoops
flying-sheep Mar 13, 2026
d8d0e80
no rng warning
flying-sheep Mar 13, 2026
a491b3a
Update src/scanpy/neighbors/__init__.py
flying-sheep Mar 20, 2026
f736f2d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 20, 2026
1596277
don’t store random_state metadata if it’s ignored
flying-sheep Mar 20, 2026
fbdc47e
comment on RNG spawning
flying-sheep Mar 20, 2026
c745802
fix tests
flying-sheep Mar 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3983.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for {class}`numpy.random.Generator` to all functions previously accepting a `random_state` parameter {smaller}`P Angerer`
1 change: 1 addition & 0 deletions hatch.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ overrides.matrix.deps.python = [
{ if = [ "low-vers" ], value = "3.12" },
]
overrides.matrix.deps.extra-dependencies = [
{ if = [ "stable" ], value = "scipy>=1.17" },
{ if = [ "pre" ], value = "anndata @ git+https://github.com/scverse/anndata.git" },
{ if = [ "pre" ], value = "pandas>=3rc0" },
]
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ filterwarnings = [
"ignore:The `igraph` implementation of leiden clustering:UserWarning",
# everybody uses this zarr 3 feature, including us, XArray, lots of data out there …
"ignore:Consolidated metadata is currently not part:UserWarning",
# joblib fallback to serial mode in restricted multiprocessing environments
"ignore:.*joblib will operate in serial mode:UserWarning",
]

[tool.coverage]
Expand Down
17 changes: 17 additions & 0 deletions src/scanpy/_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Shared docstrings for general parameters."""

from __future__ import annotations

__all__ = ["doc_rng"]

doc_rng = """\
rng
Random number generation to control stochasticity.

If a type:`SeedLike` value, it’s used to seed a new random number generator;
If a :class:`numpy.random.Generator`, `rng`’s state will be directly advanced;
If :data:`None`, a non-reproducible random number generator is used.
See :func:`numpy.random.default_rng` for more details.

The default value matches legacy scanpy behavior and will change to `None` in scanpy 2.0.\
"""
156 changes: 123 additions & 33 deletions src/scanpy/_utils/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,27 @@
from typing import TYPE_CHECKING

import numpy as np
from sklearn.utils import check_random_state
from sklearn.utils.random import check_random_state

from . import ensure_igraph

if TYPE_CHECKING:
from collections.abc import Generator
from collections.abc import Callable, Generator
from typing import Self

from numpy.random import BitGenerator
from numpy.typing import NDArray


__all__ = [
"RNGLike",
"SeedLike",
"_LegacyRandom",
"_accepts_legacy_random_state",
"_if_legacy_apply_global",
"_legacy_random_state",
"_set_igraph_rng",
"ith_k_tuple",
"legacy_numpy_gen",
"random_k_tuples",
"random_str",
]
Expand All @@ -38,34 +43,38 @@


class _RNGIgraph:
"""Random number generator for igraph so global seed is not changed.
"""Random number generator for igraph so global random state is not changed.

See :func:`igraph.set_random_number_generator` for the requirements.
"""

def __init__(self, random_state: int | np.random.RandomState = 0) -> None:
self._rng = check_random_state(random_state)
def __init__(self, rng: SeedLike | RNGLike | None) -> None:
self._rng = np.random.default_rng(rng)

def getrandbits(self, k: int) -> int:
return self._rng.tomaxint() & ((1 << k) - 1)
if isinstance(self._rng, _LegacyRng):
i = self._rng.state.tomaxint()
else:
lims = np.iinfo(np.uint64)
i = int(self._rng.integers(0, lims.max, dtype=np.uint64, endpoint=True))
return i & ((1 << k) - 1)

def randint(self, a: int, b: int) -> int:
return self._rng.randint(a, b + 1)
def randint(self, a: int, b: int) -> np.int64:
"""Can’t use `endpoint` here as _LegacyRng doesn’t support it."""
return self._rng.integers(a, b + 1)

def __getattr__(self, attr: str):
return getattr(self._rng, "normal" if attr == "gauss" else attr)


@contextmanager
def set_igraph_random_state(
random_state: int | np.random.RandomState,
) -> Generator[None, None, None]:
def _set_igraph_rng(rng: SeedLike | RNGLike | None) -> Generator[None]:
ensure_igraph()
import igraph

rng = _RNGIgraph(random_state)
ig_rng = _RNGIgraph(rng)
try:
igraph.set_random_number_generator(rng)
igraph.set_random_number_generator(ig_rng)
yield None
finally:
igraph.set_random_number_generator(random)
Expand All @@ -76,42 +85,123 @@ def set_igraph_random_state(
###################################


def legacy_numpy_gen(
random_state: _LegacyRandom | None = None,
) -> np.random.Generator:
"""Return a random generator that behaves like the legacy one."""
if random_state is not None:
if isinstance(random_state, np.random.RandomState):
np.random.set_state(random_state.get_state(legacy=False))
return _FakeRandomGen(random_state)
np.random.seed(random_state)
return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator()))
class _LegacyRng(np.random.Generator):
"""A `Generator` that wraps a legacy `RandomState` instance.

To behave like a `RandomState`, it’s not enough to just use a MT19937 `bit_generator`
(as in `Generator(RandomState(seed).bit_generator)`),
so instead this hack uses the exact same random numbers as `RandomState(seed)`.
"""

arg: _LegacyRandom
state: np.random.RandomState

class _FakeRandomGen(np.random.Generator):
_state: np.random.RandomState
def __init__(
self, arg: _LegacyRandom, state: np.random.RandomState | None = None
) -> None:
self.arg = arg
self.state = check_random_state(arg) if state is None else state

def __init__(self, random_state: np.random.RandomState) -> None:
self._state = random_state
@property
def bit_generator(self) -> BitGenerator:
msg = "A _LegacyRng instance has no `bit_generator` attribute."
raise AttributeError(msg)

@classmethod
def wrap_global(
cls,
arg: _LegacyRandom = None,
state: np.random.RandomState | None = None,
) -> Self:
"""Create a generator that wraps the global `RandomState` backing the legacy `np.random` functions."""
if arg is not None:
if isinstance(arg, np.random.RandomState):
np.random.set_state(arg.get_state(legacy=False))
return _LegacyRng(arg, state)
np.random.seed(arg)
return _LegacyRng(arg, np.random.RandomState(np.random.get_bit_generator()))

def spawn(self, n_children: int) -> list[Self]:
"""Return `self` `n_children` times.

In a real generator, the spawned children are independent,
but for backwards compatibility we return the same instance so that its internal state is advanced by each child.
"""
return [self] * n_children

@classmethod
def _delegate(cls) -> None:
names = dict(integers="randint")
for name, meth in np.random.Generator.__dict__.items():
if name.startswith("_") or not callable(meth):
if name.startswith("_") or not callable(meth) or name in cls.__dict__:
continue

def mk_wrapper(name: str, meth):
# Old pytest versions try to run the doctests
@wraps(meth, assigned=set(WRAPPER_ASSIGNMENTS) - {"__doc__"})
def wrapper(self: _FakeRandomGen, *args, **kwargs):
return getattr(self._state, name)(*args, **kwargs)
def wrapper(self: _LegacyRng, *args, **kwargs):
return getattr(self.state, name)(*args, **kwargs)

return wrapper

setattr(cls, name, mk_wrapper(name, meth))
setattr(cls, names.get(name, name), mk_wrapper(name, meth))


_LegacyRng._delegate()


def _if_legacy_apply_global(rng: np.random.Generator, /) -> np.random.Generator:
"""Wrap the global legacy RNG if `rng` is a `_LegacyRng`.

This is used where our code used to call `np.random.seed()`.
It’s a no-op if `rng` is not a `_LegacyRng`.
"""
if not isinstance(rng, _LegacyRng):
return rng

return _LegacyRng.wrap_global(rng.arg, rng.state)


def _legacy_random_state(
rng: SeedLike | RNGLike | None, /, *, always_state: bool = False
) -> _LegacyRandom:
"""Convert a np.random.Generator into a legacy `random_state` argument.

If `rng` is already a `_LegacyRng`, return its original `arg` attribute.
"""
if isinstance(rng, _LegacyRng):
return rng.state if always_state else rng.arg
[bitgen] = np.random.default_rng(rng).bit_generator.spawn(1)
return np.random.RandomState(bitgen)


def _accepts_legacy_random_state[**P, R](
random_state_default: _LegacyRandom, /
) -> Callable[[Callable[P, R]], Callable[P, R]]:
"""Make a function accept `random_state: _LegacyRandom` and pass it as `rng`.

If the decorated function is called with a `random_state` argument,
it’ll be wrapped in a `_LegacyRng`.
Passing both `rng` and `random_state` at the same time is an error.
If neither is given, `random_state_default` is used.
"""

_FakeRandomGen._delegate()
def decorator(func: Callable[P, R]) -> Callable[P, R]:
@wraps(func)
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
match "random_state" in kwargs, "rng" in kwargs:
case True, True:
msg = "Specify at most one of `rng` and `random_state`."
raise TypeError(msg)
case True, False:
kwargs["rng"] = _LegacyRng(kwargs.pop("random_state"))
case False, False:
kwargs["rng"] = _LegacyRng(random_state_default)
return func(*args, **kwargs)

return wrapper

return decorator


###################
Expand Down
14 changes: 9 additions & 5 deletions src/scanpy/datasets/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@

from .. import _utils
from .._compat import deprecated
from .._docs import doc_rng
from .._settings import settings
from .._utils import _doc_params
from .._utils._doctests import doctest_internet, doctest_needs
from .._utils.random import _accepts_legacy_random_state, _legacy_random_state
from ..readwrite import read, read_h5ad, read_visium
from ._utils import check_datasetdir_exists

if TYPE_CHECKING:
from typing import Literal

from .._utils.random import _LegacyRandom
from .._utils.random import RNGLike, SeedLike

type VisiumSampleID = Literal[
"V1_Breast_Cancer_Block_A_Section_1",
Expand Down Expand Up @@ -54,13 +57,15 @@
HERE = Path(__file__).parent


@_doc_params(rng=doc_rng)
@_accepts_legacy_random_state(0)
def blobs(
*,
n_variables: int = 11,
n_centers: int = 5,
cluster_std: float = 1.0,
n_observations: int = 640,
random_state: _LegacyRandom = 0,
rng: SeedLike | RNGLike | None = None,
) -> AnnData:
"""Gaussian Blobs.

Expand All @@ -75,8 +80,7 @@ def blobs(
n_observations
Number of observations. By default, this is the same observation number
as in :func:`scanpy.datasets.krumsiek11`.
random_state
Determines random number generation for dataset creation.
{rng}

Returns
-------
Expand All @@ -98,7 +102,7 @@ def blobs(
n_features=n_variables,
centers=n_centers,
cluster_std=cluster_std,
random_state=random_state,
random_state=_legacy_random_state(rng),
)
return AnnData(x, obs=dict(blobs=y.astype(str)))

Expand Down
4 changes: 2 additions & 2 deletions src/scanpy/experimental/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@
doc_pca_chunk = """\
n_comps
Number of principal components to compute in the PCA step.
random_state
Random seed for setting the initial states for the optimization in the PCA step.
rng
Random number generator for setting the initial states for the optimization in the PCA step.
kwargs_pca
Dictionary of further keyword arguments passed on to `scanpy.pp.pca()`.
"""
Expand Down
7 changes: 5 additions & 2 deletions src/scanpy/experimental/pp/_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ... import logging as logg
from ..._compat import CSBase, warn
from ..._utils import _doc_params, _empty, check_nonnegative_integers, view_to_actual
from ..._utils.random import _accepts_legacy_random_state
from ...experimental._docs import (
doc_adata,
doc_check_values,
Expand All @@ -27,6 +28,7 @@
from typing import Any

from ..._utils import Empty
from ..._utils.random import RNGLike, SeedLike


def _pearson_residuals(
Expand Down Expand Up @@ -160,13 +162,14 @@ def normalize_pearson_residuals(
check_values=doc_check_values,
inplace=doc_inplace,
)
@_accepts_legacy_random_state(0)
def normalize_pearson_residuals_pca(
adata: AnnData,
*,
theta: float = 100,
clip: float | None = None,
n_comps: int | None = 50,
random_state: float = 0,
rng: SeedLike | RNGLike | None = None,
kwargs_pca: Mapping[str, Any] = MappingProxyType({}),
mask_var: np.ndarray | str | None | Empty = _empty,
use_highly_variable: bool | None = None,
Expand Down Expand Up @@ -233,7 +236,7 @@ def normalize_pearson_residuals_pca(
normalize_pearson_residuals(
adata_pca, theta=theta, clip=clip, check_values=check_values
)
pca(adata_pca, n_comps=n_comps, random_state=random_state, **kwargs_pca)
pca(adata_pca, n_comps=n_comps, rng=rng, **kwargs_pca)
n_comps = adata_pca.obsm["X_pca"].shape[1] # might be None

if inplace:
Expand Down
Loading
Loading