Skip to content

Commit ef47eb8

Browse files
authored
chore: switch disallow_forward_slash_in_h5ad default to True (#2436)
1 parent e5f2c7c commit ef47eb8

16 files changed

Lines changed: 188 additions & 107 deletions

docs/release-notes/0.12.3.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@
1010
- Deprecate `__version__` and use standard {func}`~importlib.metadata.version` API {user}`flying-sheep` ({pr}`1318`)
1111
- Allow writing of views of {class}`dask.array.Array` {user}`ilan-gold` ({pr}`2084`)
1212
- Enable writing of views of {class}`~anndata.AnnData` in backed mode {user}`ilan-gold` ({pr}`2092`)
13-
- Reallow writing of keys in `h5ad` files with forward slashes instead of erroring. Now a warning will be raised that the behavior will be disallowed in the future. To enable the new behavior, use {attr}`anndata.settings.disallow_forward_slash_in_h5ad`. {user}`ilan-gold` ({pr}`2097`)
13+
- Reallow writing of keys in `h5ad` files with forward slashes instead of erroring.
14+
Now a warning will be raised that the behavior will be disallowed in the future.
15+
To enable the new behavior, use {attr}`anndata.settings.disallow_forward_slash_in_h5ad`. {user}`ilan-gold` ({pr}`2097`)
1416
- Respect off-axis merge options in {func}`anndata.experimental.concat_on_disk` {user}`ilan-gold` ({pr}`2122`)

docs/release-notes/2436.chore.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Flip default for {attr}`anndata.settings.disallow_forward_slash_in_h5ad` to `True`.
2+
Document writing to `k="/"` in {func}`~anndata.io.write_elem` and check `k` more strictly {user}`flying-sheep`

src/anndata/_io/h5ad.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from .specs import read_elem, write_elem
2929
from .specs.registry import IOSpec, write_spec
3030
from .utils import (
31+
_check_has_no_slash_key,
3132
_read_legacy_raw,
3233
idx_chunks_along_axis,
3334
no_write_dataset_2d,
@@ -86,6 +87,8 @@ def write_h5ad(
8687
f.attrs.setdefault("encoding-type", "anndata")
8788
f.attrs.setdefault("encoding-version", "0.1.0")
8889
for k, elem in iter_outer(adata):
90+
_check_has_no_slash_key(k, elem)
91+
8992
if k == "raw":
9093
_write_raw(
9194
f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs
@@ -139,9 +142,10 @@ def _write_raw(
139142
if "raw/X" in as_dense and isinstance(
140143
raw.X, CSMatrix | BaseCompressedSparseDataset
141144
):
142-
write_sparse_as_dense(f, "raw/X", raw.X, dataset_kwargs=dataset_kwargs)
143-
write_elem(f, "raw/var", raw.var, dataset_kwargs=dataset_kwargs)
144-
write_elem(f, "raw/varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
145+
g = f.require_group("raw")
146+
write_sparse_as_dense(g, "X", raw.X, dataset_kwargs=dataset_kwargs)
147+
write_elem(g, "var", raw.var, dataset_kwargs=dataset_kwargs)
148+
write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
145149
elif raw is not None:
146150
write_elem(f, "raw", raw, dataset_kwargs=dataset_kwargs)
147151

src/anndata/_io/specs/methods.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@
2525
from anndata._core.index import _normalize_indices
2626
from anndata._core.merge import intersect_keys
2727
from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_dataset
28-
from anndata._io.utils import check_key, zero_dim_array_as_scalar
28+
from anndata._io.utils import (
29+
_check_has_no_slash_key,
30+
check_key,
31+
zero_dim_array_as_scalar,
32+
)
2933
from anndata._types import StorageType
3034
from anndata._warnings import OldFormatWarning
3135
from anndata.compat import (
@@ -340,19 +344,19 @@ def write_anndata(
340344
):
341345
g = f.require_group(k)
342346
for sub_key, elem in iter_outer(adata):
343-
if not (sub_key == "X" and elem is None):
344-
if sub_key == "layers":
345-
if None in elem:
346-
_writer.write_elem(
347-
g, "X", elem[None], dataset_kwargs=dataset_kwargs
348-
)
349-
elem = {k: v for k, v in elem.items() if k is not None}
350-
_writer.write_elem(
351-
g,
352-
sub_key,
353-
dict(elem) if isinstance(elem, MutableMapping) else elem,
354-
dataset_kwargs=dataset_kwargs,
355-
)
347+
if sub_key == "X" and elem is None:
348+
continue
349+
_check_has_no_slash_key(sub_key, elem)
350+
if sub_key == "layers":
351+
if None in elem:
352+
_writer.write_elem(g, "X", elem[None], dataset_kwargs=dataset_kwargs)
353+
elem = {k: v for k, v in elem.items() if k is not None}
354+
_writer.write_elem(
355+
g,
356+
sub_key,
357+
dict(elem) if isinstance(elem, MutableMapping) else elem,
358+
dataset_kwargs=dataset_kwargs,
359+
)
356360

357361

358362
@_REGISTRY.register_read(H5Group, IOSpec("anndata", "0.1.0"))

src/anndata/_io/specs/registry.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -364,42 +364,47 @@ def write_elem(
364364

365365
from anndata._io.zarr import is_group_consolidated
366366

367-
# we allow stores to have a prefix like /uns which are then written to with keys like /uns/foo
368-
is_zarr_group = isinstance(store, ZarrGroup)
369-
if "/" in k.rsplit(store.name, maxsplit=1)[-1][1:]:
370-
if is_zarr_group or settings.disallow_forward_slash_in_h5ad:
371-
msg = f"Forward slashes are not allowed in keys in {type(store)}"
372-
raise ValueError(msg)
373-
else:
374-
msg = "Forward slashes will be disallowed in h5 stores in the next minor release"
375-
warn(msg, FutureWarning)
376-
377367
if isinstance(store, h5py.File):
378368
store = store["/"]
379-
380-
dest_type = type(store)
381-
382-
# Normalize k to absolute path
383-
if isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute():
384-
k = str(PurePosixPath(store.name) / k)
385-
is_consolidated = is_group_consolidated(store) if is_zarr_group else False
386-
if is_consolidated:
369+
elif is_group_consolidated(store, strict=False):
387370
msg = "Cannot overwrite/edit a store with consolidated metadata"
388371
raise ValueError(msg)
372+
389373
if k == "/":
374+
if store.name != "/":
375+
msg = f"'/' is not in the subpath of {store.name!r}"
376+
raise ValueError(msg)
377+
390378
if isinstance(store, ZarrGroup):
391379
from zarr.core.sync import sync
392380

393381
sync(store.store.clear())
394382
else:
395383
store.clear()
396-
elif k in store:
397-
del store[k]
384+
else:
385+
# we allow stores to have a prefix like /uns which are then written to with keys like /uns/foo
386+
if k.startswith("/"):
387+
k = str(PurePosixPath(k).relative_to(store.name, walk_up=False))
388+
389+
# Apart from this code, we also ban keys containing slashes in `write_adata`/`write_h5ad`
390+
# for AnnData elements other than `obs`, `var`, and `uns`.
391+
if "/" in k:
392+
if (
393+
isinstance(store, ZarrGroup)
394+
or settings.disallow_forward_slash_in_h5ad
395+
):
396+
msg = f"Forward slashes are not allowed in keys in {type(store)}"
397+
raise ValueError(msg)
398+
msg = "Forward slashes will be written differently in a future anndata version"
399+
warn(msg, FutureWarning)
400+
401+
if k in store:
402+
del store[k]
398403

399404
# Normalize array-API (e.g., JAX/CuPy) even if not AnnData
400405
elem = normalize_nested(elem)
401406

402-
write_func = self.find_write_func(dest_type, elem, modifiers)
407+
write_func = self.find_write_func(type(store), elem, modifiers)
403408

404409
if self.callback is None:
405410
return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
@@ -525,8 +530,9 @@ def write_elem(
525530
store
526531
The group to write to.
527532
k
528-
The key to write to in the group. Note that absolute paths will be written
529-
from the root.
533+
The key to write into the group.
534+
If the group is the root, set `k` to `"/"` to write directly into it.
535+
Passing an absolute path referring to a direct child of the group is also allowed.
530536
elem
531537
The element to write. Typically an in-memory object, e.g. an AnnData, pandas
532538
dataframe, scipy sparse matrix, etc.

src/anndata/_io/utils.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from collections.abc import Callable
3+
from collections.abc import Callable, Mapping
44
from functools import WRAPPER_ASSIGNMENTS, cache, wraps
55
from itertools import pairwise
66
from typing import TYPE_CHECKING, Literal, cast
@@ -12,7 +12,7 @@
1212
from ..utils import warn
1313

1414
if TYPE_CHECKING:
15-
from collections.abc import Callable, Mapping
15+
from collections.abc import Callable
1616
from typing import Any, Literal
1717

1818
from pandas.core.dtypes.dtypes import BaseMaskedDtype
@@ -257,6 +257,7 @@ def report_write_key_on_error(func):
257257

258258
@wraps(func)
259259
def func_wrapper(*args, **kwargs):
260+
__tracebackhide__ = True
260261
from anndata._io.specs import Writer
261262

262263
# Figure out signature (method vs function) by going through args
@@ -278,6 +279,16 @@ def func_wrapper(*args, **kwargs):
278279
return func_wrapper
279280

280281

282+
def _check_has_no_slash_key(attr: str, elem: object) -> None:
283+
"""Only attempt to write slash keys where people rely on it for backwards compatibility."""
284+
if attr in {"obs", "var", "uns", "raw"}:
285+
return # separate check for `settings.disallow_forward_slash_in_h5ad` is done in `write_elem`
286+
assert isinstance(elem, Mapping)
287+
if any("/" in k for k in elem if k not in {"/", None}):
288+
msg = f"Forward slashes are not allowed in keys in {attr}"
289+
raise ValueError(msg)
290+
291+
281292
# -------------------------------------------------------------------------------
282293
# Common h5ad/zarr stuff
283294
# -------------------------------------------------------------------------------

src/anndata/_io/zarr.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from zarr.core.common import AccessModeLiteral
2727
from zarr.storage import StoreLike
2828

29+
from .._types import _GroupStorageType
30+
2931

3032
@contextmanager
3133
def zarrs_context():
@@ -175,8 +177,10 @@ def open_write_group(
175177
return zarr.open_group(store, mode=mode, **kwargs)
176178

177179

178-
def is_group_consolidated(group: zarr.Group) -> bool:
180+
def is_group_consolidated(group: _GroupStorageType, *, strict: bool = True) -> bool:
179181
if not isinstance(group, zarr.Group):
180-
msg = f"Expected zarr.Group, got {type(group)}"
181-
raise TypeError(msg)
182+
if strict:
183+
msg = f"Expected zarr.Group, got {type(group)}"
184+
raise TypeError(msg)
185+
return False
182186
return group.metadata.consolidated_metadata is not None

src/anndata/_settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
491491

492492
settings.register(
493493
"disallow_forward_slash_in_h5ad",
494-
default_value=False,
494+
default_value=True,
495495
description="Whether or not to disallow the `/` character in keys for h5ad files",
496496
validate=validate_bool,
497497
get_from_env=check_and_get_bool,

src/anndata/_settings.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class _AnnDataSettingsManager(SettingsManager):
4444
zarr_write_format: Literal[2, 3] = 3
4545
use_sparse_array_on_read: bool = False
4646
min_rows_for_chunked_h5_copy: int = 1000
47-
disallow_forward_slash_in_h5ad: bool = False
47+
disallow_forward_slash_in_h5ad: bool = True
4848
write_csr_csc_indices_with_min_possible_dtype: bool = False
4949
auto_shard_zarr_v3: bool = True
5050

src/anndata/_types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,10 @@ def __call__(
156156
class ReadCallback[S: StorageType, RWAble: typing.RWAble](Protocol):
157157
def __call__(
158158
self,
159-
/,
160159
read_func: Read[S, RWAble],
161160
elem_name: str,
162161
elem: StorageType,
162+
/,
163163
*,
164164
iospec: IOSpec,
165165
) -> RWAble:
@@ -188,11 +188,11 @@ def __call__(
188188
class WriteCallback[RWAble: typing.RWAble](Protocol):
189189
def __call__(
190190
self,
191-
/,
192191
write_func: Write[RWAble],
193192
store: StorageType,
194193
elem_name: str,
195194
elem: RWAble,
195+
/,
196196
*,
197197
iospec: IOSpec,
198198
dataset_kwargs: Mapping[str, Any],

0 commit comments

Comments
 (0)