Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,28 @@ You might have more success by assembling the {class}`AnnData` object yourself f
## Writing

Writing a complete {class}`AnnData` object to disk in anndata’s native formats `.h5ad` and `zarr`.
(These functions are also exported as {func}`io.write_h5ad` and {func}`io.write_zarr`.)

```{eval-rst}
.. autosummary::
:toctree: generated/

AnnData.write
AnnData.write_h5ad
AnnData.write_zarr


..
.. autosummary::
:toctree: generated/

io.write_h5ad
io.write_zarr

.. toctree::
:hidden:

generated/anndata.io.write_h5ad
generated/anndata.io.write_zarr
```

Writing individual portions ({attr}`~AnnData.obs`, {attr}`~AnnData.varm` etc.) of the {class}`AnnData` object.
Expand Down
3 changes: 3 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def setup(app: Sphinx):
"anndata._types.Write": "anndata.experimental.Write",
"zarr.core.array.Array": "zarr.Array",
"zarr.core.group.Group": "zarr.Group",
# Buffer is not yet exported, so the buffer class registry is the closest thing
"zarr.core.buffer.core.Buffer": "zarr.registry.Registry",
"zarr.storage._common.StorePath": "zarr.storage.StorePath",
"anndata.compat.DaskArray": "dask.array.Array",
"anndata.compat.CupyArray": "cupy.ndarray",
"anndata.compat.CupySparseMatrix": "cupyx.scipy.sparse.spmatrix",
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/1914.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `convert_strings_to_categoricals` parameter also to {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` as intended {user}`flying-sheep`
42 changes: 32 additions & 10 deletions src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
from os import PathLike
from typing import Any, Literal

from zarr.storage import StoreLike

from ..compat import Index1D
from ..typing import XDataType
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
Expand Down Expand Up @@ -216,7 +218,7 @@ def __init__(
raw: Mapping[str, Any] | None = None,
dtype: np.dtype | type | str | None = None,
shape: tuple[int, int] | None = None,
filename: PathLike | None = None,
filename: PathLike[str] | str | None = None,
filemode: Literal["r", "r+"] | None = None,
asview: bool = False,
obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
Expand Down Expand Up @@ -960,7 +962,7 @@ def filename(self) -> Path | None:
return self.file.filename

@filename.setter
def filename(self, filename: PathLike | None):
def filename(self, filename: PathLike[str] | str | None):
# convert early for later comparison
filename = None if filename is None else Path(filename)
# change from backing-mode back to full loading into memory
Expand Down Expand Up @@ -1439,7 +1441,7 @@ def to_memory(self, *, copy: bool = False) -> AnnData:

return AnnData(**new)

def copy(self, filename: PathLike | None = None) -> AnnData:
def copy(self, filename: PathLike[str] | str | None = None) -> AnnData:
"""Full copy, optionally on disk."""
if not self.isbacked:
if self.is_view and self._has_X():
Expand Down Expand Up @@ -1800,9 +1802,12 @@ def _check_dimensions(self, key=None):
)
raise ValueError(msg)

@old_positionals("compression", "compression_opts", "as_dense")
def write_h5ad(
self,
filename: PathLike | None = None,
filename: PathLike[str] | str | None = None,
*,
convert_strings_to_categoricals: bool = True,
compression: Literal["gzip", "lzf"] | None = None,
compression_opts: int | Any = None,
as_dense: Sequence[str] = (),
Expand All @@ -1826,6 +1831,8 @@ def write_h5ad(
----------
filename
Filename of data file. Defaults to backing file.
convert_strings_to_categoricals
Convert string columns to categorical.
compression
For [`lzf`, `gzip`], see the h5py :ref:`dataset_compression`.

Expand Down Expand Up @@ -1880,6 +1887,7 @@ def write_h5ad(
write_h5ad(
Path(filename),
self,
convert_strings_to_categoricals=convert_strings_to_categoricals,
compression=compression,
compression_opts=compression_opts,
as_dense=as_dense,
Expand All @@ -1891,7 +1899,9 @@ def write_h5ad(
write = write_h5ad # a shortcut and backwards compat

@old_positionals("skip_data", "sep")
def write_csvs(self, dirname: PathLike, *, skip_data: bool = True, sep: str = ","):
def write_csvs(
self, dirname: PathLike[str] | str, *, skip_data: bool = True, sep: str = ","
):
"""\
Write annotation to `.csv` files.

Expand All @@ -1912,7 +1922,9 @@ def write_csvs(self, dirname: PathLike, *, skip_data: bool = True, sep: str = ",
write_csvs(dirname, self, skip_data=skip_data, sep=sep)

@old_positionals("write_obsm_varm")
def write_loom(self, filename: PathLike, *, write_obsm_varm: bool = False):
def write_loom(
self, filename: PathLike[str] | str, *, write_obsm_varm: bool = False
):
"""\
Write `.loom`-formatted hdf5 file.

Expand All @@ -1925,10 +1937,13 @@ def write_loom(self, filename: PathLike, *, write_obsm_varm: bool = False):

write_loom(filename, self, write_obsm_varm=write_obsm_varm)

@old_positionals("chunks")
def write_zarr(
self,
store: MutableMapping | PathLike,
store: StoreLike,
*,
chunks: tuple[int, ...] | None = None,
convert_strings_to_categoricals: bool = True,
):
"""\
Write a hierarchical Zarr array store.
Expand All @@ -1939,6 +1954,8 @@ def write_zarr(
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
chunks
Chunk shape.
convert_strings_to_categoricals
Convert string columns to categorical.
"""
from ..io import write_zarr

Expand All @@ -1949,7 +1966,12 @@ def write_zarr(
"Please pass `write_zarr(adata)` instead."
)
raise ValueError(msg)
write_zarr(store, self, chunks=chunks)
write_zarr(
store,
self,
chunks=chunks,
convert_strings_to_categoricals=convert_strings_to_categoricals,
)

def chunked_X(self, chunk_size: int | None = None):
"""\
Expand Down Expand Up @@ -2090,10 +2112,10 @@ def _infer_shape_for_axis(
return elem.shape[0]
for elem, id in zip([layers, xxxm, xxxp], ["layers", "xxxm", "xxxp"]):
if elem is not None:
elem = cast(Mapping, elem)
elem = cast("Mapping", elem)
for sub_elem in elem.values():
if hasattr(sub_elem, "shape"):
size = cast(int, sub_elem.shape[axis if id == "layers" else 0])
size = cast("int", sub_elem.shape[axis if id == "layers" else 0])
return size
return None

Expand Down
6 changes: 3 additions & 3 deletions src/anndata/_core/file_backing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class AnnDataFileManager:
def __init__(
self,
adata: anndata.AnnData,
filename: PathLike | None = None,
filename: PathLike[str] | str | None = None,
filemode: Literal["r", "r+"] | None = None,
):
self._adata_ref = weakref.ref(adata)
Expand Down Expand Up @@ -81,12 +81,12 @@ def filename(self) -> Path:
return self._filename

@filename.setter
def filename(self, filename: PathLike | None):
def filename(self, filename: PathLike[str] | str | None):
self._filename = None if filename is None else Path(filename)

def open(
self,
filename: PathLike | None = None,
filename: PathLike[str] | str | None = None,
filemode: Literal["r", "r+"] | None = None,
):
if filename is not None:
Expand Down
10 changes: 7 additions & 3 deletions src/anndata/_io/h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

if TYPE_CHECKING:
from collections.abc import Callable, Collection, Mapping, Sequence
from os import PathLike
from typing import Any, Literal

from .._core.file_backing import AnnDataFileManager
Expand All @@ -44,14 +45,15 @@


def write_h5ad(
filepath: Path | str,
filepath: PathLike[str] | str,
adata: AnnData,
*,
as_dense: Sequence[str] = (),
convert_strings_to_categoricals: bool = True,
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
**kwargs,
) -> None:
"""See :meth:`~anndata.AnnData.write_h5ad`."""
if isinstance(as_dense, str):
as_dense = [as_dense]
if "raw.X" in as_dense:
Expand Down Expand Up @@ -140,7 +142,9 @@ def write_sparse_as_dense(
del f[key]


def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:
def read_h5ad_backed(
filename: str | PathLike[str], mode: Literal["r", "r+"]
) -> AnnData:
d = dict(filename=filename, filemode=mode)

f = h5py.File(filename, mode)
Expand Down Expand Up @@ -169,7 +173,7 @@ def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData:


def read_h5ad(
filename: str | Path,
filename: PathLike[str] | str,
backed: Literal["r", "r+"] | bool | None = None,
*,
as_sparse: Sequence[str] = (),
Expand Down
16 changes: 9 additions & 7 deletions src/anndata/_io/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


def read_csv(
filename: PathLike | Iterator[str],
filename: PathLike[str] | str | Iterator[str],
delimiter: str | None = ",",
first_column_names: bool | None = None,
dtype: str = "float32",
Expand All @@ -49,7 +49,9 @@ def read_csv(
return read_text(filename, delimiter, first_column_names, dtype)


def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") -> AnnData:
def read_excel(
filename: PathLike[str] | str, sheet: str | int, dtype: str = "float32"
) -> AnnData:
"""\
Read `.xlsx` (Excel) file.

Expand All @@ -73,7 +75,7 @@ def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") ->
return AnnData(X, row, col)


def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
def read_umi_tools(filename: PathLike[str] | str, dtype=None) -> AnnData:
"""\
Read a gzipped condensed count matrix from umi_tools.

Expand All @@ -96,7 +98,7 @@ def read_umi_tools(filename: PathLike, dtype=None) -> AnnData:
return AnnData(X=X, obs=obs, var=var)


def read_hdf(filename: PathLike, key: str) -> AnnData:
def read_hdf(filename: PathLike[str] | str, key: str) -> AnnData:
"""\
Read `.h5` (hdf5) file.

Expand Down Expand Up @@ -152,7 +154,7 @@ def _fmt_loom_axis_attrs(

@_deprecate_positional_args(version="0.9")
def read_loom(
filename: PathLike,
filename: PathLike[str] | str,
*,
sparse: bool = True,
cleanup: bool = False,
Expand Down Expand Up @@ -295,7 +297,7 @@ def read_loom(
return adata


def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:
def read_mtx(filename: PathLike[str] | str, dtype: str = "float32") -> AnnData:
"""\
Read `.mtx` file.

Expand All @@ -317,7 +319,7 @@ def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData:


def read_text(
filename: PathLike | Iterator[str],
filename: PathLike[str] | str | Iterator[str],
delimiter: str | None = None,
first_column_names: bool | None = None,
dtype: str = "float32",
Expand Down
2 changes: 1 addition & 1 deletion src/anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def read_indices(group):


def read_partial(
pth: PathLike,
pth: PathLike[str] | str,
*,
obs_idx=slice(None),
var_idx=slice(None),
Expand Down
11 changes: 9 additions & 2 deletions src/anndata/_io/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@

@old_positionals("skip_data", "sep")
def write_csvs(
dirname: PathLike, adata: AnnData, *, skip_data: bool = True, sep: str = ","
dirname: PathLike[str] | str,
adata: AnnData,
*,
skip_data: bool = True,
sep: str = ",",
):
"""See :meth:`~anndata.AnnData.write_csvs`."""
dirname = Path(dirname)
Expand Down Expand Up @@ -78,7 +82,10 @@ def write_csvs(


@old_positionals("write_obsm_varm")
def write_loom(filename: PathLike, adata: AnnData, *, write_obsm_varm: bool = False):
def write_loom(
filename: PathLike[str] | str, adata: AnnData, *, write_obsm_varm: bool = False
) -> None:
"""See :meth:`~anndata.AnnData.write_loom`."""
filename = Path(filename)
row_attrs = {k: np.array(v) for k, v in adata.var.to_dict("list").items()}
row_names = adata.var_names
Expand Down
4 changes: 3 additions & 1 deletion src/anndata/_io/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

if TYPE_CHECKING:
from collections.abc import MutableMapping
from os import PathLike

from zarr.core.common import AccessModeLiteral
from zarr.storage import StoreLike
Expand All @@ -34,6 +35,7 @@ def write_zarr(
convert_strings_to_categoricals: bool = True,
**ds_kwargs,
) -> None:
"""See :meth:`~anndata.AnnData.write_zarr`."""
if isinstance(store, Path):
store = str(store)
if convert_strings_to_categoricals:
Expand Down Expand Up @@ -61,7 +63,7 @@ def callback(func, s, k: str, elem, dataset_kwargs, iospec):
zarr.consolidate_metadata(f.store)


def read_zarr(store: str | Path | MutableMapping | zarr.Group) -> AnnData:
def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnData:
"""\
Read from a hierarchical Zarr array store.

Expand Down
Loading