Skip to content

Commit 96b07c4

Browse files
committed
Backport PR #1914: (fix): add convert_strings_to_categoricals to methods
1 parent decd534 commit 96b07c4

12 files changed

Lines changed: 96 additions & 45 deletions

File tree

docs/api.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,28 @@ You might have more success by assembling the {class}`AnnData` object yourself f
6969
## Writing
7070

7171
Writing a complete {class}`AnnData` object to disk in anndata’s native formats `.h5ad` and `zarr`.
72+
(These functions are also exported as {func}`io.write_h5ad` and {func}`io.write_zarr`.)
7273

7374
```{eval-rst}
7475
.. autosummary::
7576
:toctree: generated/
7677
77-
AnnData.write
78+
AnnData.write_h5ad
7879
AnnData.write_zarr
80+
81+
82+
..
83+
.. autosummary::
84+
:toctree: generated/
85+
86+
io.write_h5ad
87+
io.write_zarr
88+
89+
.. toctree::
90+
:hidden:
91+
92+
generated/anndata.io.write_h5ad
93+
generated/anndata.io.write_zarr
7994
```
8095

8196
Writing individual portions ({attr}`~AnnData.obs`, {attr}`~AnnData.varm` etc.) of the {class}`AnnData` object.

docs/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ def setup(app: Sphinx):
129129
"anndata._types.WriteCallback": "anndata.experimental.WriteCallback",
130130
"anndata._types.Read": "anndata.experimental.Read",
131131
"anndata._types.Write": "anndata.experimental.Write",
132+
# Buffer is not yet exported, so the buffer class registry is the closest thing
133+
"zarr.core.buffer.core.Buffer": "zarr.registry.Registry",
134+
"zarr.storage._common.StorePath": "zarr.storage.StorePath",
132135
"anndata.compat.DaskArray": "dask.array.Array",
133136
"anndata.compat.CupyArray": "cupy.ndarray",
134137
"anndata.compat.CupySparseMatrix": "cupyx.scipy.sparse.spmatrix",

docs/release-notes/1914.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add `convert_strings_to_categoricals` parameter also to {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` as intended {user}`flying-sheep`

src/anndata/_core/anndata.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
from os import PathLike
5555
from typing import Any, Literal
5656

57+
from zarr.storage import StoreLike
58+
5759
from ..compat import Index1D
5860
from ..typing import ArrayDataStructureType
5961
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
@@ -205,7 +207,7 @@ def __init__(
205207
raw: Mapping[str, Any] | None = None,
206208
dtype: np.dtype | type | str | None = None,
207209
shape: tuple[int, int] | None = None,
208-
filename: PathLike | None = None,
210+
filename: PathLike[str] | str | None = None,
209211
filemode: Literal["r", "r+"] | None = None,
210212
asview: bool = False,
211213
*,
@@ -947,7 +949,7 @@ def filename(self) -> Path | None:
947949
return self.file.filename
948950

949951
@filename.setter
950-
def filename(self, filename: PathLike | None):
952+
def filename(self, filename: PathLike[str] | str | None):
951953
# convert early for later comparison
952954
filename = None if filename is None else Path(filename)
953955
# change from backing-mode back to full loading into memory
@@ -1423,7 +1425,7 @@ def to_memory(self, copy=False) -> AnnData:
14231425

14241426
return AnnData(**new)
14251427

1426-
def copy(self, filename: PathLike | None = None) -> AnnData:
1428+
def copy(self, filename: PathLike[str] | str | None = None) -> AnnData:
14271429
"""Full copy, optionally on disk."""
14281430
if not self.isbacked:
14291431
if self.is_view and self._has_X():
@@ -1787,10 +1789,12 @@ def _check_dimensions(self, key=None):
17871789

17881790
def write_h5ad(
17891791
self,
1790-
filename: PathLike | None = None,
1792+
filename: PathLike[str] | str | None = None,
17911793
compression: Literal["gzip", "lzf"] | None = None,
17921794
compression_opts: int | Any = None,
17931795
as_dense: Sequence[str] = (),
1796+
*,
1797+
convert_strings_to_categoricals: bool = True,
17941798
):
17951799
"""\
17961800
Write `.h5ad`-formatted hdf5 file.
@@ -1811,6 +1815,8 @@ def write_h5ad(
18111815
----------
18121816
filename
18131817
Filename of data file. Defaults to backing file.
1818+
convert_strings_to_categoricals
1819+
Convert string columns to categorical.
18141820
compression
18151821
For [`lzf`, `gzip`], see the h5py :ref:`dataset_compression`.
18161822
@@ -1865,6 +1871,7 @@ def write_h5ad(
18651871
write_h5ad(
18661872
Path(filename),
18671873
self,
1874+
convert_strings_to_categoricals=convert_strings_to_categoricals,
18681875
compression=compression,
18691876
compression_opts=compression_opts,
18701877
as_dense=as_dense,
@@ -1875,7 +1882,9 @@ def write_h5ad(
18751882

18761883
write = write_h5ad # a shortcut and backwards compat
18771884

1878-
def write_csvs(self, dirname: PathLike, skip_data: bool = True, sep: str = ","):
1885+
def write_csvs(
1886+
self, dirname: PathLike[str] | str, skip_data: bool = True, sep: str = ","
1887+
):
18791888
"""\
18801889
Write annotation to `.csv` files.
18811890
@@ -1895,7 +1904,7 @@ def write_csvs(self, dirname: PathLike, skip_data: bool = True, sep: str = ","):
18951904

18961905
write_csvs(dirname, self, skip_data=skip_data, sep=sep)
18971906

1898-
def write_loom(self, filename: PathLike, write_obsm_varm: bool = False):
1907+
def write_loom(self, filename: PathLike[str] | str, write_obsm_varm: bool = False):
18991908
"""\
19001909
Write `.loom`-formatted hdf5 file.
19011910
@@ -1910,8 +1919,10 @@ def write_loom(self, filename: PathLike, write_obsm_varm: bool = False):
19101919

19111920
def write_zarr(
19121921
self,
1913-
store: MutableMapping | PathLike,
1922+
store: StoreLike,
19141923
chunks: bool | int | tuple[int, ...] | None = None,
1924+
*,
1925+
convert_strings_to_categoricals: bool = True,
19151926
):
19161927
"""\
19171928
Write a hierarchical Zarr array store.
@@ -1922,10 +1933,17 @@ def write_zarr(
19221933
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
19231934
chunks
19241935
Chunk shape.
1936+
convert_strings_to_categoricals
1937+
Convert string columns to categorical.
19251938
"""
19261939
from ..io import write_zarr
19271940

1928-
write_zarr(store, self, chunks=chunks)
1941+
write_zarr(
1942+
store,
1943+
self,
1944+
chunks=chunks,
1945+
convert_strings_to_categoricals=convert_strings_to_categoricals,
1946+
)
19291947

19301948
def chunked_X(self, chunk_size: int | None = None):
19311949
"""\
@@ -2064,10 +2082,10 @@ def _infer_shape_for_axis(
20642082
return elem.shape[0]
20652083
for elem, id in zip([layers, xxxm, xxxp], ["layers", "xxxm", "xxxp"]):
20662084
if elem is not None:
2067-
elem = cast(Mapping, elem)
2085+
elem = cast("Mapping", elem)
20682086
for sub_elem in elem.values():
20692087
if hasattr(sub_elem, "shape"):
2070-
size = cast(int, sub_elem.shape[axis if id == "layers" else 0])
2088+
size = cast("int", sub_elem.shape[axis if id == "layers" else 0])
20712089
return size
20722090
return None
20732091

src/anndata/_core/file_backing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class AnnDataFileManager:
2525
def __init__(
2626
self,
2727
adata: anndata.AnnData,
28-
filename: PathLike | None = None,
28+
filename: PathLike[str] | str | None = None,
2929
filemode: Literal["r", "r+"] | None = None,
3030
):
3131
self._adata_ref = weakref.ref(adata)
@@ -80,12 +80,12 @@ def filename(self) -> Path:
8080
return self._filename
8181

8282
@filename.setter
83-
def filename(self, filename: PathLike | None):
83+
def filename(self, filename: PathLike[str] | str | None):
8484
self._filename = None if filename is None else Path(filename)
8585

8686
def open(
8787
self,
88-
filename: PathLike | None = None,
88+
filename: PathLike[str] | str | None = None,
8989
filemode: Literal["r", "r+"] | None = None,
9090
):
9191
if filename is not None:

src/anndata/_io/h5ad.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
if TYPE_CHECKING:
3737
from collections.abc import Callable, Collection, Mapping, Sequence
38+
from os import PathLike
3839
from typing import Any, Literal
3940

4041
from .._core.file_backing import AnnDataFileManager
@@ -44,14 +45,15 @@
4445

4546

4647
def write_h5ad(
47-
filepath: Path | str,
48+
filepath: PathLike[str] | str,
4849
adata: AnnData,
4950
*,
5051
as_dense: Sequence[str] = (),
5152
convert_strings_to_categoricals: bool = True,
5253
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
5354
**kwargs,
5455
) -> None:
56+
"""See :meth:`~anndata.AnnData.write_h5ad`."""
5557
if isinstance(as_dense, str):
5658
as_dense = [as_dense]
5759
if "raw.X" in as_dense:
@@ -140,7 +142,9 @@ def write_sparse_as_dense(
140142
del f[key]
141143

142144

143-
def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:
145+
def read_h5ad_backed(
146+
filename: str | PathLike[str], mode: Literal["r", "r+"]
147+
) -> AnnData:
144148
d = dict(filename=filename, filemode=mode)
145149

146150
f = h5py.File(filename, mode)
@@ -169,7 +173,7 @@ def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:
169173

170174

171175
def read_h5ad(
172-
filename: str | Path,
176+
filename: PathLike[str] | str,
173177
backed: Literal["r", "r+"] | bool | None = None,
174178
*,
175179
as_sparse: Sequence[str] = (),

src/anndata/_io/read.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424

2525
def read_csv(
26-
filename: PathLike | Iterator[str],
26+
filename: PathLike[str] | str | Iterator[str],
2727
delimiter: str | None = ",",
2828
first_column_names: bool | None = None,
2929
dtype: str = "float32",
@@ -49,7 +49,9 @@ def read_csv(
4949
return read_text(filename, delimiter, first_column_names, dtype)
5050

5151

52-
def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") -> AnnData:
52+
def read_excel(
53+
filename: PathLike[str] | str, sheet: str | int, dtype: str = "float32"
54+
) -> AnnData:
5355
"""\
5456
Read `.xlsx` (Excel) file.
5557
@@ -73,7 +75,7 @@ def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") ->
7375
return AnnData(X, row, col)
7476

7577

76-
def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
78+
def read_umi_tools(filename: PathLike[str] | str, dtype=None) -> AnnData:
7779
"""\
7880
Read a gzipped condensed count matrix from umi_tools.
7981
@@ -96,7 +98,7 @@ def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
9698
return AnnData(X=X, obs=obs, var=var)
9799

98100

99-
def read_hdf(filename: PathLike, key: str) -> AnnData:
101+
def read_hdf(filename: PathLike[str] | str, key: str) -> AnnData:
100102
"""\
101103
Read `.h5` (hdf5) file.
102104
@@ -152,7 +154,7 @@ def _fmt_loom_axis_attrs(
152154

153155
@_deprecate_positional_args(version="0.9")
154156
def read_loom(
155-
filename: PathLike,
157+
filename: PathLike[str] | str,
156158
*,
157159
sparse: bool = True,
158160
cleanup: bool = False,
@@ -295,7 +297,7 @@ def read_loom(
295297
return adata
296298

297299

298-
def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
300+
def read_mtx(filename: PathLike[str] | str, dtype: str = "float32") -> AnnData:
299301
"""\
300302
Read `.mtx` file.
301303
@@ -317,7 +319,7 @@ def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
317319

318320

319321
def read_text(
320-
filename: PathLike | Iterator[str],
322+
filename: PathLike[str] | str | Iterator[str],
321323
delimiter: str | None = None,
322324
first_column_names: bool | None = None,
323325
dtype: str = "float32",

src/anndata/_io/specs/methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def read_indices(group):
192192

193193

194194
def read_partial(
195-
pth: PathLike,
195+
pth: PathLike[str] | str,
196196
*,
197197
obs_idx=slice(None),
198198
var_idx=slice(None),

src/anndata/_io/write.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
def write_csvs(
25-
dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ","
25+
dirname: PathLike[str] | str, adata: AnnData, skip_data: bool = True, sep: str = ","
2626
):
2727
"""See :meth:`~anndata.AnnData.write_csvs`."""
2828
dirname = Path(dirname)
@@ -75,7 +75,10 @@ def write_csvs(
7575
)
7676

7777

78-
def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False):
78+
def write_loom(
79+
filename: PathLike[str] | str, adata: AnnData, write_obsm_varm: bool = False
80+
) -> None:
81+
"""See :meth:`~anndata.AnnData.write_loom`."""
7982
filename = Path(filename)
8083
row_attrs = {k: np.array(v) for k, v in adata.var.to_dict("list").items()}
8184
row_names = adata.var_names

src/anndata/_io/zarr.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
if TYPE_CHECKING:
2121
from collections.abc import MutableMapping
22+
from os import PathLike
2223

2324
T = TypeVar("T")
2425

@@ -31,6 +32,7 @@ def write_zarr(
3132
convert_strings_to_categoricals: bool = True,
3233
**ds_kwargs,
3334
) -> None:
35+
"""See :meth:`~anndata.AnnData.write_zarr`."""
3436
if isinstance(store, Path):
3537
store = str(store)
3638
if convert_strings_to_categoricals:
@@ -50,7 +52,7 @@ def callback(func, s, k, elem, dataset_kwargs, iospec):
5052
write_dispatched(f, "/", adata, callback=callback, dataset_kwargs=ds_kwargs)
5153

5254

53-
def read_zarr(store: str | Path | MutableMapping | zarr.Group) -> AnnData:
55+
def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnData:
5456
"""\
5557
Read from a hierarchical Zarr array store.
5658

0 commit comments

Comments
 (0)