Skip to content
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8143e07
WIP
flying-sheep Oct 9, 2024
2bfdd7a
make all test modules import
flying-sheep Oct 10, 2024
2d13618
some misc fixes
flying-sheep Oct 10, 2024
b8f91c0
Merge branch 'main' into x-layers-unification
flying-sheep Oct 25, 2024
f12f4d7
small fixes
flying-sheep Oct 25, 2024
95e8f1d
fix write_anndata
flying-sheep Oct 25, 2024
a6555f8
some more fixes
flying-sheep Oct 25, 2024
56252a8
Merge branch 'main' into x-layers-unification
flying-sheep Jul 15, 2025
77024ba
make setting X to None equivalent to deleting it
flying-sheep Jul 15, 2025
208b8bb
handle X from layers
flying-sheep Jul 15, 2025
fca1a87
simplify copy
flying-sheep Jul 15, 2025
5ac31c4
delegate transpose to layers
flying-sheep Jul 15, 2025
1d0151b
Merge branch 'main' into x-layers-unification
ilan-gold Aug 22, 2025
3756974
fix: most immutable setting situtations (#2095)
ilan-gold Aug 29, 2025
28436ec
make them falsy
flying-sheep Aug 29, 2025
08d2fcb
Merge branch 'main' into x-layers-unification
ilan-gold Feb 12, 2026
77d218d
fix: dont accidentally open backed mode
ilan-gold Feb 12, 2026
5c118e0
fix: remove expectant test
ilan-gold Feb 12, 2026
9342e91
fix: collection dtype checking
ilan-gold Feb 12, 2026
8513bde
fix: layers test
ilan-gold Feb 12, 2026
c9b096a
fix: copy changes
ilan-gold Feb 12, 2026
a41f725
fix: removal
ilan-gold Feb 12, 2026
c8d9683
fix: warn
ilan-gold Feb 12, 2026
9330978
maybe don't warn on deleting?
ilan-gold Feb 12, 2026
b2651e0
fix: del test
ilan-gold Feb 12, 2026
24f1684
fix: del yes!
ilan-gold Feb 12, 2026
0ee2a12
fix: correct conditoin
ilan-gold Feb 12, 2026
30d4474
Merge branch 'main' into x-layers-unification
ilan-gold Feb 12, 2026
5e7b8ad
Merge branch 'x-layers-unification' of github.com:scverse/anndata int…
ilan-gold Feb 12, 2026
1cdd856
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 12, 2026
579bede
Merge branch 'main' into x-layers-unification
ilan-gold Feb 25, 2026
0d187b4
fix: sizeof
ilan-gold Feb 26, 2026
4ea7847
fix: update view setting
ilan-gold Feb 26, 2026
4f97aaa
fix: copying X in setting None
ilan-gold Feb 26, 2026
ad93198
fix: virtualenv
ilan-gold Feb 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/concatenation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Let's start off with an example:
uns: 'bulk_labels_colors', 'louvain', 'louvain_colors', 'neighbors', 'pca', 'rank_genes_groups'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: None
obsp: 'distances', 'connectivities'

If we split this object up by clusters of observations, then stack those subsets we'll obtain the same values – just ordered differently.
Expand All @@ -42,6 +43,7 @@ If we split this object up by clusters of observations, then stack those subsets
var: 'n_counts', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: None

Note that we concatenated along the observations by default, and that most elements aligned to the observations were concatenated as well.
A notable exception is :attr:`~anndata.AnnData.obsp`, which can be re-enabled with the `pairwise` keyword argument.
Expand Down Expand Up @@ -168,6 +170,7 @@ First, our example case:
uns: 'pca'
obsm: 'X_pca'
varm: 'PCs'
layers: None

Now we will split this object by the categorical `"blobs"` and recombine it to illustrate different merge strategies.

Expand All @@ -184,6 +187,7 @@ Now we will split this object by the categorical `"blobs"` and recombine it to i
uns: 'pca'
obsm: 'X_pca', 'qc'
varm: 'PCs', '0_qc'
layers: None

`adatas` is now a list of datasets with disjoint sets of observations and a common set of variables.
Each object has had QC metrics computed, with observation-wise metrics stored under `"qc"` in `.obsm`, and variable-wise metrics stored with a unique key for each subset.
Expand All @@ -193,16 +197,19 @@ Taking a look at how this affects concatenation:
AnnData object with n_obs × n_vars = 640 × 30
obs: 'blobs'
obsm: 'X_pca', 'qc'
layers: None
>>> ad.concat(adatas, merge="same")
AnnData object with n_obs × n_vars = 640 × 30
obs: 'blobs'
obsm: 'X_pca', 'qc'
varm: 'PCs'
layers: None
>>> ad.concat(adatas, merge="unique")
AnnData object with n_obs × n_vars = 640 × 30
obs: 'blobs'
obsm: 'X_pca', 'qc'
varm: 'PCs', '0_qc', '1_qc', '2_qc', '3_qc', '4_qc'
layers: None

Note that comparisons are made after indices are aligned.
That is, if the objects only share a subset of indices on the alternative axis, it's only required that values for those indices match when using a strategy like `"same"`.
Expand Down
100 changes: 60 additions & 40 deletions src/anndata/_core/aligned_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections.abc import MutableMapping, Sequence
from copy import copy
from dataclasses import dataclass
from types import NoneType
from typing import TYPE_CHECKING, Generic, TypeVar

import numpy as np
Expand Down Expand Up @@ -38,12 +39,13 @@
# TODO: pd.DataFrame only allowed in AxisArrays?
Value = pd.DataFrame | CSMatrix | CSArray | np.ndarray

K = TypeVar("K", str, str | None)
P = TypeVar("P", bound="AlignedMappingBase")
"""Parent mapping an AlignedView is based on."""
I = TypeVar("I", OneDIdx, TwoDIdx)


class AlignedMappingBase(MutableMapping[str, Value], ABC):
class AlignedMappingBase(MutableMapping[K, Value], ABC, Generic[K]):
"""\
An abstract base class for Mappings containing array-like values aligned
to either one or both AnnData axes.
Expand All @@ -61,13 +63,13 @@ class AlignedMappingBase(MutableMapping[str, Value], ABC):
_parent: AnnData | Raw
"""The parent object that this mapping is aligned to."""

def __repr__(self):
return f"{type(self).__name__} with keys: {', '.join(self.keys())}"
def __repr__(self) -> str:
return f"{type(self).__name__} with keys: {', '.join(map(repr, self.keys()))}"

def _ipython_key_completions_(self) -> list[str]:
def _ipython_key_completions_(self) -> list[K]:
return list(self.keys())

def _validate_value(self, val: Value, key: str) -> Value:
def _validate_value(self, val: Value, key: K) -> Value:
"""Raises an error if value is invalid"""
if isinstance(val, AwkArray):
warn_once(
Expand Down Expand Up @@ -120,13 +122,14 @@ def is_view(self) -> bool: ...
def parent(self) -> AnnData | Raw:
return self._parent

def copy(self) -> dict[str, Value]:
def copy(self) -> dict[K, Value]:
# Shallow copy for awkward array since their buffers are immutable
return {
k: copy(v) if isinstance(v, AwkArray) else v.copy() for k, v in self.items()
k: copy(v) if isinstance(v, AwkArray | NoneType) else v.copy()
for k, v in self.items()
}

def _view(self, parent: AnnData, subset_idx: I) -> AlignedView[Self, I]:
def _view(self, parent: AnnData, subset_idx: I) -> AlignedView[K, Self, I]:
"""Returns a subset copy-on-write view of the object."""
return self._view_class(self, parent, subset_idx)

Expand All @@ -135,7 +138,7 @@ def as_dict(self) -> dict:
return dict(self)


class AlignedView(AlignedMappingBase, Generic[P, I]):
class AlignedView(AlignedMappingBase[K], Generic[K, P, I]):
is_view: ClassVar[Literal[True]] = True

# override docstring
Expand All @@ -159,13 +162,15 @@ def __init__(self, parent_mapping: P, parent_view: AnnData, subset_idx: I):
# LayersBase has no _axis, the rest does
self._axis = parent_mapping._axis # type: ignore

def __getitem__(self, key: str) -> Value:
def __getitem__(self, key: K) -> Value:
if self.parent_mapping[key] is None:
return None
return as_view(
_subset(self.parent_mapping[key], self.subset_idx),
ElementRef(self.parent, self.attrname, (key,)),
)

def __setitem__(self, key: str, value: Value) -> None:
def __setitem__(self, key: K, value: Value) -> None:
value = self._validate_value(value, key) # Validate before mutating
warnings.warn(
f"Setting element `.{self.attrname}['{key}']` of view, "
Expand All @@ -174,9 +179,12 @@ def __setitem__(self, key: str, value: Value) -> None:
stacklevel=2,
)
with view_update(self.parent, self.attrname, ()) as new_mapping:
new_mapping[key] = value
if value is None:
del new_mapping[key]
else:
new_mapping[key] = value

def __delitem__(self, key: str) -> None:
def __delitem__(self, key: K) -> None:
if key not in self:
msg = f"{key!r} not found in view of {self.attrname}"
raise KeyError(msg) # Make sure it exists before bothering with a copy
Expand All @@ -189,49 +197,58 @@ def __delitem__(self, key: str) -> None:
with view_update(self.parent, self.attrname, ()) as new_mapping:
del new_mapping[key]

def __contains__(self, key: str) -> bool:
def __contains__(self, key: K) -> bool:
return key in self.parent_mapping

def __iter__(self) -> Iterator[str]:
def __iter__(self) -> Iterator[K]:
return iter(self.parent_mapping)

def __len__(self) -> int:
return len(self.parent_mapping)


class AlignedActual(AlignedMappingBase):
class AlignedActual(AlignedMappingBase[K], Generic[K]):
is_view: ClassVar[Literal[False]] = False

_data: MutableMapping[str, Value]
_data: MutableMapping[K, Value]
"""Underlying mapping to the data"""

def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[str, Value]):
def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[K, Value]):
self._parent = parent
self._data = store
for k, v in self._data.items():
if v is None:
continue
self._data[k] = self._validate_value(v, k)

def __getitem__(self, key: str) -> Value:
def __getitem__(self, key: K) -> Value:
return self._data[key]

def __setitem__(self, key: str, value: Value):
value = self._validate_value(value, key)
self._data[key] = value
def __setitem__(self, key: K, value: Value):
if value is not None:
value = self._validate_value(value, key)
if key is None and value is None:
del self[key]
else:
self._data[key] = value

def __contains__(self, key: str) -> bool:
def __contains__(self, key: K) -> bool:
return key in self._data

def __delitem__(self, key: str):
del self._data[key]
def __delitem__(self, key: K):
if key is None:
self._data.pop(key, None)
else:
del self._data[key]

def __iter__(self) -> Iterator[str]:
def __iter__(self) -> Iterator[K]:
return iter(self._data)

def __len__(self) -> int:
return len(self._data)


class AxisArraysBase(AlignedMappingBase):
class AxisArraysBase(AlignedMappingBase[str]):
"""\
Mapping of key→array-like,
where array-like is aligned to an axis of parent AnnData.
Expand Down Expand Up @@ -288,7 +305,7 @@ def dim_names(self) -> pd.Index:
return (self.parent.obs_names, self.parent.var_names)[self._axis]


class AxisArrays(AlignedActual, AxisArraysBase):
class AxisArrays(AlignedActual[str], AxisArraysBase):
def __init__(
self,
parent: AnnData | Raw,
Expand All @@ -302,15 +319,15 @@ def __init__(
super().__init__(parent, store=store)


class AxisArraysView(AlignedView[AxisArraysBase, OneDIdx], AxisArraysBase):
class AxisArraysView(AlignedView[str, AxisArraysBase, OneDIdx], AxisArraysBase):
pass


AxisArraysBase._view_class = AxisArraysView
AxisArraysBase._actual_class = AxisArrays


class LayersBase(AlignedMappingBase):
class LayersBase(AlignedMappingBase[str | None]):
"""\
Mapping of key: array-like, where array-like is aligned to both axes of the
parent anndata.
Expand All @@ -321,19 +338,19 @@ class LayersBase(AlignedMappingBase):
axes: ClassVar[tuple[Literal[0], Literal[1]]] = (0, 1)


class Layers(AlignedActual, LayersBase):
class Layers(AlignedActual[str | None], LayersBase):
pass


class LayersView(AlignedView[LayersBase, TwoDIdx], LayersBase):
class LayersView(AlignedView[str | None, LayersBase, TwoDIdx], LayersBase):
pass


LayersBase._view_class = LayersView
LayersBase._actual_class = Layers


class PairwiseArraysBase(AlignedMappingBase):
class PairwiseArraysBase(AlignedMappingBase[str]):
"""\
Mapping of key: array-like, where both axes of array-like are aligned to
one axis of the parent anndata.
Expand All @@ -359,7 +376,7 @@ def dim(self) -> str:
return self._dimnames[self._axis]


class PairwiseArrays(AlignedActual, PairwiseArraysBase):
class PairwiseArrays(AlignedActual[str], PairwiseArraysBase):
def __init__(
self,
parent: AnnData,
Expand All @@ -373,7 +390,9 @@ def __init__(
super().__init__(parent, store=store)


class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArraysBase):
class PairwiseArraysView(
AlignedView[str, PairwiseArraysBase, OneDIdx], PairwiseArraysBase
):
pass


Expand All @@ -394,7 +413,7 @@ class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArray


@dataclass
class AlignedMappingProperty(property, Generic[T]):
class AlignedMappingProperty(property, Generic[K, T]):
"""A :class:`property` that creates an ephemeral AlignedMapping.

The actual data is stored as `f'_{self.name}'` in the parent object.
Expand All @@ -407,7 +426,7 @@ class AlignedMappingProperty(property, Generic[T]):
axis: Literal[0, 1] | None = None
"""Axis of the parent to align to."""

def construct(self, obj: AnnData, *, store: MutableMapping[str, Value]) -> T:
def construct(self, obj: AnnData, *, store: MutableMapping[K, Value]) -> T:
if self.axis is None:
return self.cls(obj, store=store)
return self.cls(obj, axis=self.axis, store=store)
Expand All @@ -434,13 +453,14 @@ def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T:
return parent._view(obj, tuple(idxs[ax] for ax in parent.axes))

def __set__(
self, obj: AnnData, value: Mapping[str, Value] | Iterable[tuple[str, Value]]
self, obj: AnnData, value: Mapping[K, Value] | Iterable[tuple[K, Value]] | None
) -> None:
value = convert_to_dict(value)
_ = self.construct(obj, store=value) # Validate
if obj.is_view:
obj._init_as_actual(obj.copy())
setattr(obj, f"_{self.name}", value)

def __delete__(self, obj) -> None:
setattr(obj, self.name, dict())
def __delete__(self, obj: AnnData) -> None:
new = {None: x} if (x := getattr(obj, self.name).get(None)) is not None else {}
setattr(obj, self.name, new)
Loading
Loading