Skip to content

Commit 762094b

Browse files
authored
(fix): add convert_strings_to_categoricals to methods (#1914)
1 parent 45b796f commit 762094b

14 files changed

Lines changed: 109 additions & 48 deletions

File tree

docs/api.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,28 @@ You might have more success by assembling the {class}`AnnData` object yourself f
6969
## Writing
7070

7171
Writing a complete {class}`AnnData` object to disk in anndata’s native formats `.h5ad` and `zarr`.
72+
(These functions are also exported as {func}`io.write_h5ad` and {func}`io.write_zarr`.)
7273

7374
```{eval-rst}
7475
.. autosummary::
7576
:toctree: generated/
7677
77-
AnnData.write
78+
AnnData.write_h5ad
7879
AnnData.write_zarr
80+
81+
82+
..
83+
.. autosummary::
84+
:toctree: generated/
85+
86+
io.write_h5ad
87+
io.write_zarr
88+
89+
.. toctree::
90+
:hidden:
91+
92+
generated/anndata.io.write_h5ad
93+
generated/anndata.io.write_zarr
7994
```
8095

8196
Writing individual portions ({attr}`~AnnData.obs`, {attr}`~AnnData.varm` etc.) of the {class}`AnnData` object.

docs/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ def setup(app: Sphinx):
132132
"anndata._types.Write": "anndata.experimental.Write",
133133
"zarr.core.array.Array": "zarr.Array",
134134
"zarr.core.group.Group": "zarr.Group",
135+
# Buffer is not yet exported, so the buffer class registry is the closest thing
136+
"zarr.core.buffer.core.Buffer": "zarr.registry.Registry",
137+
"zarr.storage._common.StorePath": "zarr.storage.StorePath",
135138
"anndata.compat.DaskArray": "dask.array.Array",
136139
"anndata.compat.CupyArray": "cupy.ndarray",
137140
"anndata.compat.CupySparseMatrix": "cupyx.scipy.sparse.spmatrix",

docs/release-notes/1914.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add `convert_strings_to_categoricals` parameter also to {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` as intended {user}`flying-sheep`

src/anndata/_core/anndata.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
from os import PathLike
5454
from typing import Any, Literal
5555

56+
from zarr.storage import StoreLike
57+
5658
from ..compat import Index1D
5759
from ..typing import XDataType
5860
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
@@ -216,7 +218,7 @@ def __init__(
216218
raw: Mapping[str, Any] | None = None,
217219
dtype: np.dtype | type | str | None = None,
218220
shape: tuple[int, int] | None = None,
219-
filename: PathLike | None = None,
221+
filename: PathLike[str] | str | None = None,
220222
filemode: Literal["r", "r+"] | None = None,
221223
asview: bool = False,
222224
obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
@@ -960,7 +962,7 @@ def filename(self) -> Path | None:
960962
return self.file.filename
961963

962964
@filename.setter
963-
def filename(self, filename: PathLike | None):
965+
def filename(self, filename: PathLike[str] | str | None):
964966
# convert early for later comparison
965967
filename = None if filename is None else Path(filename)
966968
# change from backing-mode back to full loading into memory
@@ -1439,7 +1441,7 @@ def to_memory(self, *, copy: bool = False) -> AnnData:
14391441

14401442
return AnnData(**new)
14411443

1442-
def copy(self, filename: PathLike | None = None) -> AnnData:
1444+
def copy(self, filename: PathLike[str] | str | None = None) -> AnnData:
14431445
"""Full copy, optionally on disk."""
14441446
if not self.isbacked:
14451447
if self.is_view and self._has_X():
@@ -1800,9 +1802,12 @@ def _check_dimensions(self, key=None):
18001802
)
18011803
raise ValueError(msg)
18021804

1805+
@old_positionals("compression", "compression_opts", "as_dense")
18031806
def write_h5ad(
18041807
self,
1805-
filename: PathLike | None = None,
1808+
filename: PathLike[str] | str | None = None,
1809+
*,
1810+
convert_strings_to_categoricals: bool = True,
18061811
compression: Literal["gzip", "lzf"] | None = None,
18071812
compression_opts: int | Any = None,
18081813
as_dense: Sequence[str] = (),
@@ -1826,6 +1831,8 @@ def write_h5ad(
18261831
----------
18271832
filename
18281833
Filename of data file. Defaults to backing file.
1834+
convert_strings_to_categoricals
1835+
Convert string columns to categorical.
18291836
compression
18301837
For [`lzf`, `gzip`], see the h5py :ref:`dataset_compression`.
18311838
@@ -1880,6 +1887,7 @@ def write_h5ad(
18801887
write_h5ad(
18811888
Path(filename),
18821889
self,
1890+
convert_strings_to_categoricals=convert_strings_to_categoricals,
18831891
compression=compression,
18841892
compression_opts=compression_opts,
18851893
as_dense=as_dense,
@@ -1891,7 +1899,9 @@ def write_h5ad(
18911899
write = write_h5ad # a shortcut and backwards compat
18921900

18931901
@old_positionals("skip_data", "sep")
1894-
def write_csvs(self, dirname: PathLike, *, skip_data: bool = True, sep: str = ","):
1902+
def write_csvs(
1903+
self, dirname: PathLike[str] | str, *, skip_data: bool = True, sep: str = ","
1904+
):
18951905
"""\
18961906
Write annotation to `.csv` files.
18971907
@@ -1912,7 +1922,9 @@ def write_csvs(self, dirname: PathLike, *, skip_data: bool = True, sep: str = ",
19121922
write_csvs(dirname, self, skip_data=skip_data, sep=sep)
19131923

19141924
@old_positionals("write_obsm_varm")
1915-
def write_loom(self, filename: PathLike, *, write_obsm_varm: bool = False):
1925+
def write_loom(
1926+
self, filename: PathLike[str] | str, *, write_obsm_varm: bool = False
1927+
):
19161928
"""\
19171929
Write `.loom`-formatted hdf5 file.
19181930
@@ -1925,10 +1937,13 @@ def write_loom(self, filename: PathLike, *, write_obsm_varm: bool = False):
19251937

19261938
write_loom(filename, self, write_obsm_varm=write_obsm_varm)
19271939

1940+
@old_positionals("chunks")
19281941
def write_zarr(
19291942
self,
1930-
store: MutableMapping | PathLike,
1943+
store: StoreLike,
1944+
*,
19311945
chunks: tuple[int, ...] | None = None,
1946+
convert_strings_to_categoricals: bool = True,
19321947
):
19331948
"""\
19341949
Write a hierarchical Zarr array store.
@@ -1939,6 +1954,8 @@ def write_zarr(
19391954
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
19401955
chunks
19411956
Chunk shape.
1957+
convert_strings_to_categoricals
1958+
Convert string columns to categorical.
19421959
"""
19431960
from ..io import write_zarr
19441961

@@ -1949,7 +1966,12 @@ def write_zarr(
19491966
"Please pass `write_zarr(adata)` instead."
19501967
)
19511968
raise ValueError(msg)
1952-
write_zarr(store, self, chunks=chunks)
1969+
write_zarr(
1970+
store,
1971+
self,
1972+
chunks=chunks,
1973+
convert_strings_to_categoricals=convert_strings_to_categoricals,
1974+
)
19531975

19541976
def chunked_X(self, chunk_size: int | None = None):
19551977
"""\
@@ -2090,10 +2112,10 @@ def _infer_shape_for_axis(
20902112
return elem.shape[0]
20912113
for elem, id in zip([layers, xxxm, xxxp], ["layers", "xxxm", "xxxp"]):
20922114
if elem is not None:
2093-
elem = cast(Mapping, elem)
2115+
elem = cast("Mapping", elem)
20942116
for sub_elem in elem.values():
20952117
if hasattr(sub_elem, "shape"):
2096-
size = cast(int, sub_elem.shape[axis if id == "layers" else 0])
2118+
size = cast("int", sub_elem.shape[axis if id == "layers" else 0])
20972119
return size
20982120
return None
20992121

src/anndata/_core/file_backing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class AnnDataFileManager:
2626
def __init__(
2727
self,
2828
adata: anndata.AnnData,
29-
filename: PathLike | None = None,
29+
filename: PathLike[str] | str | None = None,
3030
filemode: Literal["r", "r+"] | None = None,
3131
):
3232
self._adata_ref = weakref.ref(adata)
@@ -81,12 +81,12 @@ def filename(self) -> Path:
8181
return self._filename
8282

8383
@filename.setter
84-
def filename(self, filename: PathLike | None):
84+
def filename(self, filename: PathLike[str] | str | None):
8585
self._filename = None if filename is None else Path(filename)
8686

8787
def open(
8888
self,
89-
filename: PathLike | None = None,
89+
filename: PathLike[str] | str | None = None,
9090
filemode: Literal["r", "r+"] | None = None,
9191
):
9292
if filename is not None:

src/anndata/_io/h5ad.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
if TYPE_CHECKING:
3838
from collections.abc import Callable, Collection, Mapping, Sequence
39+
from os import PathLike
3940
from typing import Any, Literal
4041

4142
from .._core.file_backing import AnnDataFileManager
@@ -44,14 +45,15 @@
4445

4546

4647
def write_h5ad(
47-
filepath: Path | str,
48+
filepath: PathLike[str] | str,
4849
adata: AnnData,
4950
*,
5051
as_dense: Sequence[str] = (),
5152
convert_strings_to_categoricals: bool = True,
5253
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
5354
**kwargs,
5455
) -> None:
56+
"""See :meth:`~anndata.AnnData.write_h5ad`."""
5557
if isinstance(as_dense, str):
5658
as_dense = [as_dense]
5759
if "raw.X" in as_dense:
@@ -140,7 +142,9 @@ def write_sparse_as_dense(
140142
del f[key]
141143

142144

143-
def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:
145+
def read_h5ad_backed(
146+
filename: str | PathLike[str], mode: Literal["r", "r+"]
147+
) -> AnnData:
144148
d = dict(filename=filename, filemode=mode)
145149

146150
f = h5py.File(filename, mode)
@@ -169,7 +173,7 @@ def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:
169173

170174

171175
def read_h5ad(
172-
filename: str | Path,
176+
filename: PathLike[str] | str,
173177
backed: Literal["r", "r+"] | bool | None = None,
174178
*,
175179
as_sparse: Sequence[str] = (),

src/anndata/_io/read.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424

2525
def read_csv(
26-
filename: PathLike | Iterator[str],
26+
filename: PathLike[str] | str | Iterator[str],
2727
delimiter: str | None = ",",
2828
first_column_names: bool | None = None,
2929
dtype: str = "float32",
@@ -49,7 +49,9 @@ def read_csv(
4949
return read_text(filename, delimiter, first_column_names, dtype)
5050

5151

52-
def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") -> AnnData:
52+
def read_excel(
53+
filename: PathLike[str] | str, sheet: str | int, dtype: str = "float32"
54+
) -> AnnData:
5355
"""\
5456
Read `.xlsx` (Excel) file.
5557
@@ -73,7 +75,7 @@ def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") ->
7375
return AnnData(X, row, col)
7476

7577

76-
def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
78+
def read_umi_tools(filename: PathLike[str] | str, dtype=None) -> AnnData:
7779
"""\
7880
Read a gzipped condensed count matrix from umi_tools.
7981
@@ -96,7 +98,7 @@ def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
9698
return AnnData(X=X, obs=obs, var=var)
9799

98100

99-
def read_hdf(filename: PathLike, key: str) -> AnnData:
101+
def read_hdf(filename: PathLike[str] | str, key: str) -> AnnData:
100102
"""\
101103
Read `.h5` (hdf5) file.
102104
@@ -152,7 +154,7 @@ def _fmt_loom_axis_attrs(
152154

153155
@_deprecate_positional_args(version="0.9")
154156
def read_loom(
155-
filename: PathLike,
157+
filename: PathLike[str] | str,
156158
*,
157159
sparse: bool = True,
158160
cleanup: bool = False,
@@ -295,7 +297,7 @@ def read_loom(
295297
return adata
296298

297299

298-
def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
300+
def read_mtx(filename: PathLike[str] | str, dtype: str = "float32") -> AnnData:
299301
"""\
300302
Read `.mtx` file.
301303
@@ -317,7 +319,7 @@ def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
317319

318320

319321
def read_text(
320-
filename: PathLike | Iterator[str],
322+
filename: PathLike[str] | str | Iterator[str],
321323
delimiter: str | None = None,
322324
first_column_names: bool | None = None,
323325
dtype: str = "float32",

src/anndata/_io/specs/methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def read_indices(group):
193193

194194

195195
def read_partial(
196-
pth: PathLike,
196+
pth: PathLike[str] | str,
197197
*,
198198
obs_idx=slice(None),
199199
var_idx=slice(None),

src/anndata/_io/write.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@
2424

2525
@old_positionals("skip_data", "sep")
2626
def write_csvs(
27-
dirname: PathLike, adata: AnnData, *, skip_data: bool = True, sep: str = ","
27+
dirname: PathLike[str] | str,
28+
adata: AnnData,
29+
*,
30+
skip_data: bool = True,
31+
sep: str = ",",
2832
):
2933
"""See :meth:`~anndata.AnnData.write_csvs`."""
3034
dirname = Path(dirname)
@@ -78,7 +82,10 @@ def write_csvs(
7882

7983

8084
@old_positionals("write_obsm_varm")
81-
def write_loom(filename: PathLike, adata: AnnData, *, write_obsm_varm: bool = False):
85+
def write_loom(
86+
filename: PathLike[str] | str, adata: AnnData, *, write_obsm_varm: bool = False
87+
) -> None:
88+
"""See :meth:`~anndata.AnnData.write_loom`."""
8289
filename = Path(filename)
8390
row_attrs = {k: np.array(v) for k, v in adata.var.to_dict("list").items()}
8491
row_names = adata.var_names

src/anndata/_io/zarr.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
if TYPE_CHECKING:
2121
from collections.abc import MutableMapping
22+
from os import PathLike
2223

2324
from zarr.core.common import AccessModeLiteral
2425
from zarr.storage import StoreLike
@@ -34,6 +35,7 @@ def write_zarr(
3435
convert_strings_to_categoricals: bool = True,
3536
**ds_kwargs,
3637
) -> None:
38+
"""See :meth:`~anndata.AnnData.write_zarr`."""
3739
if isinstance(store, Path):
3840
store = str(store)
3941
if convert_strings_to_categoricals:
@@ -61,7 +63,7 @@ def callback(func, s, k: str, elem, dataset_kwargs, iospec):
6163
zarr.consolidate_metadata(f.store)
6264

6365

64-
def read_zarr(store: str | Path | MutableMapping | zarr.Group) -> AnnData:
66+
def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnData:
6567
"""\
6668
Read from a hierarchical Zarr array store.
6769

0 commit comments

Comments
 (0)