Skip to content

Additional testing for AsyncArray, Array #3049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
May 15, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3049.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added tests for ``AsyncArray``, ``Array`` and removed duplicate argument parsing.
5 changes: 0 additions & 5 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
@@ -1019,11 +1019,6 @@ async def create(
warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2)
if read_only is not None:
warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2)
if dimension_separator is not None and zarr_format == 3:
raise ValueError(
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
)

if order is not None:
_warn_order_kwarg()
if write_empty_chunks is not None:
23 changes: 7 additions & 16 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
@@ -140,7 +140,8 @@
if isinstance(data, ArrayMetadata):
return data
elif isinstance(data, dict):
if data["zarr_format"] == 3:
zarr_format = data.get("zarr_format")
if zarr_format == 3:

Check warning on line 144 in src/zarr/core/array.py

Codecov / codecov/patch

src/zarr/core/array.py#L143-L144

Added lines #L143 - L144 were not covered by tests
meta_out = ArrayV3Metadata.from_dict(data)
if len(meta_out.storage_transformers) > 0:
msg = (
@@ -149,9 +150,11 @@
)
raise ValueError(msg)
return meta_out
elif data["zarr_format"] == 2:
elif zarr_format == 2:

Check warning on line 153 in src/zarr/core/array.py

Codecov / codecov/patch

src/zarr/core/array.py#L153

Added line #L153 was not covered by tests
return ArrayV2Metadata.from_dict(data)
raise TypeError
else:
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")

Check warning on line 156 in src/zarr/core/array.py

Codecov / codecov/patch

src/zarr/core/array.py#L156

Added line #L156 was not covered by tests
raise TypeError # pragma: no cover


def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
@@ -160,8 +163,7 @@
elif isinstance(metadata, ArrayV2Metadata):
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
return get_pipeline_class().from_codecs([v2_codec])
else:
raise TypeError
raise TypeError # pragma: no cover


async def get_array_metadata(
@@ -268,17 +270,6 @@
store_path: StorePath,
config: ArrayConfigLike | None = None,
) -> None:
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
# TODO: remove this when we extensively type the dict representation of metadata
_metadata = cast(dict[str, JSON], metadata)
if zarr_format == 2:
metadata = ArrayV2Metadata.from_dict(_metadata)
elif zarr_format == 3:
metadata = ArrayV3Metadata.from_dict(_metadata)
else:
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")

metadata_parsed = parse_array_metadata(metadata)
config_parsed = parse_array_config(config)

65 changes: 46 additions & 19 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING

import zarr.codecs
@@ -72,13 +73,19 @@ def test_create(memory_store: Store) -> None:

# TODO: parametrize over everything this function takes
@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_create_array(store: Store) -> None:
def test_create_array(store: Store, zarr_format: ZarrFormat) -> None:
attrs: dict[str, JSON] = {"foo": 100} # explicit type annotation to avoid mypy error
shape = (10, 10)
path = "foo"
data_val = 1
array_w = create_array(
store, name=path, shape=shape, attributes=attrs, chunks=shape, dtype="uint8"
store,
name=path,
shape=shape,
attributes=attrs,
chunks=shape,
dtype="uint8",
zarr_format=zarr_format,
)
array_w[:] = data_val
assert array_w.shape == shape
@@ -87,18 +94,27 @@ def test_create_array(store: Store) -> None:


@pytest.mark.parametrize("write_empty_chunks", [True, False])
def test_write_empty_chunks_warns(write_empty_chunks: bool) -> None:
def test_write_empty_chunks_warns(write_empty_chunks: bool, zarr_format: ZarrFormat) -> None:
"""
Test that using the `write_empty_chunks` kwarg on array access will raise a warning.
"""
match = "The `write_empty_chunks` keyword argument .*"
with pytest.warns(RuntimeWarning, match=match):
_ = zarr.array(
data=np.arange(10), shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks
data=np.arange(10),
shape=(10,),
dtype="uint8",
write_empty_chunks=write_empty_chunks,
zarr_format=zarr_format,
)

with pytest.warns(RuntimeWarning, match=match):
_ = zarr.create(shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks)
_ = zarr.create(
shape=(10,),
dtype="uint8",
write_empty_chunks=write_empty_chunks,
zarr_format=zarr_format,
)


@pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"])
@@ -115,18 +131,18 @@ def test_open_normalized_path(
assert node.path == normalize_path(path)


async def test_open_array(memory_store: MemoryStore) -> None:
async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> None:
store = memory_store

# open array, create if doesn't exist
z = open(store=store, shape=100)
z = open(store=store, shape=100, zarr_format=zarr_format)
assert isinstance(z, Array)
assert z.shape == (100,)

# open array, overwrite
# store._store_dict = {}
store = MemoryStore()
z = open(store=store, shape=200)
z = open(store=store, shape=200, zarr_format=zarr_format)
assert isinstance(z, Array)
assert z.shape == (200,)

@@ -140,7 +156,16 @@ async def test_open_array(memory_store: MemoryStore) -> None:

# path not found
with pytest.raises(FileNotFoundError):
open(store="doesnotexist", mode="r")
open(store="doesnotexist", mode="r", zarr_format=zarr_format)


@pytest.mark.parametrize("store", ["memory", "local", "zip"], indirect=True)
def test_v2_and_v3_exist_at_same_path(store: Store) -> None:
zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=3)
zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=2)
msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store}. Zarr v3 will be used."
with pytest.warns(UserWarning, match=re.escape(msg)):
zarr.open(store=store, mode="r")


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@@ -163,9 +188,9 @@ async def test_open_group(memory_store: MemoryStore) -> None:
assert "foo" in g

# open group, overwrite
# g = open_group(store=store)
# assert isinstance(g, Group)
# assert "foo" not in g
g = open_group(store=store, mode="w")
assert isinstance(g, Group)
assert "foo" not in g

# open group, read-only
store_cls = type(store)
@@ -308,7 +333,6 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None:
zarr.open(store=tmp_path, mode="w-")


@pytest.mark.parametrize("zarr_format", [2, 3])
def test_array_order(zarr_format: ZarrFormat) -> None:
arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format)
expected = zarr.config.get("array.order")
@@ -324,7 +348,6 @@ def test_array_order(zarr_format: ZarrFormat) -> None:


@pytest.mark.parametrize("order", ["C", "F"])
@pytest.mark.parametrize("zarr_format", [2, 3])
def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None:
with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"):
arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format)
@@ -1095,13 +1118,16 @@ def test_open_falls_back_to_open_group() -> None:
assert group.attrs == {"key": "value"}


async def test_open_falls_back_to_open_group_async() -> None:
async def test_open_falls_back_to_open_group_async(zarr_format: ZarrFormat) -> None:
# https://github.com/zarr-developers/zarr-python/issues/2309
store = MemoryStore()
await zarr.api.asynchronous.open_group(store, attributes={"key": "value"})
await zarr.api.asynchronous.open_group(
store, attributes={"key": "value"}, zarr_format=zarr_format
)

group = await zarr.api.asynchronous.open(store=store)
assert isinstance(group, zarr.core.group.AsyncGroup)
assert group.metadata.zarr_format == zarr_format
assert group.attrs == {"key": "value"}


@@ -1137,13 +1163,14 @@ async def test_metadata_validation_error() -> None:
["local", "memory", "zip"],
indirect=True,
)
def test_open_array_with_mode_r_plus(store: Store) -> None:
def test_open_array_with_mode_r_plus(store: Store, zarr_format: ZarrFormat) -> None:
# 'r+' means read/write (must exist)
with pytest.raises(FileNotFoundError):
zarr.open_array(store=store, mode="r+")
zarr.ones(store=store, shape=(3, 3))
zarr.open_array(store=store, mode="r+", zarr_format=zarr_format)
zarr.ones(store=store, shape=(3, 3), zarr_format=zarr_format)
z2 = zarr.open_array(store=store, mode="r+")
assert isinstance(z2, Array)
assert z2.metadata.zarr_format == zarr_format
result = z2[:]
assert isinstance(result, NDArrayLike)
assert (result == 1).all()
178 changes: 159 additions & 19 deletions tests/test_array.py
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.chunk_grids import _auto_partition
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
from zarr.core.common import JSON, MemoryOrder, ZarrFormat
from zarr.core.group import AsyncGroup
from zarr.core.indexing import BasicIndexer, ceildiv
@@ -51,7 +52,7 @@

if TYPE_CHECKING:
from zarr.core.array_spec import ArrayConfigLike
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v2 import ArrayV2Metadata


@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -227,10 +228,13 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str
assert arr.fill_value.dtype == arr.dtype


def test_create_positional_args_deprecated() -> None:
store = MemoryStore()
with pytest.warns(FutureWarning, match="Pass"):
zarr.Array.create(store, (2, 2), dtype="f8")
async def test_create_deprecated() -> None:
with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")):
await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[call-overload]
with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")):
zarr.Array.create(MemoryStore(), (2, 2), dtype="f8")


def test_selection_positional_args_deprecated() -> None:
@@ -321,24 +325,47 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_storage_transformers(store: MemoryStore) -> None:
@pytest.mark.parametrize("zarr_format", [2, 3, "invalid"])
def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict: dict[str, JSON] = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
metadata_dict: dict[str, JSON]
if zarr_format == 3:
metadata_dict = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
else:
metadata_dict = {
"zarr_format": zarr_format,
"shape": (10,),
"chunks": (1,),
"dtype": "uint8",
"dimension_separator": ".",
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"order": "C",
"storage_transformers": ({"test": "should_raise"}),
}
if zarr_format == 3:
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)
elif zarr_format == 2:
# no warning
Array.from_dict(StorePath(store), data=metadata_dict)
else:
match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3"
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)


@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]])
@@ -1106,6 +1133,111 @@ async def test_v3_chunk_encoding(
assert arr.filters == filters_expected
assert arr.compressors == compressors_expected

@staticmethod
@pytest.mark.parametrize("name", ["v2", "default", "invalid"])
@pytest.mark.parametrize("separator", [".", "/"])
async def test_chunk_key_encoding(
name: str, separator: Literal[".", "/"], zarr_format: ZarrFormat, store: MemoryStore
) -> None:
chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item]
error_msg = ""
if name == "invalid":
error_msg = "Unknown chunk key encoding."
if zarr_format == 2 and name == "default":
error_msg = "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the chunk key encoding must be 'v2'."
if error_msg:
with pytest.raises(ValueError, match=re.escape(error_msg)):
arr = await create_array(
store=store,
dtype="uint8",
shape=(10,),
chunks=(1,),
zarr_format=zarr_format,
chunk_key_encoding=chunk_key_encoding,
)
else:
arr = await create_array(
store=store,
dtype="uint8",
shape=(10,),
chunks=(1,),
zarr_format=zarr_format,
chunk_key_encoding=chunk_key_encoding,
)
if isinstance(arr.metadata, ArrayV2Metadata):
assert arr.metadata.dimension_separator == separator

@staticmethod
@pytest.mark.parametrize(
("kwargs", "error_msg"),
[
({"serializer": "bytes"}, "Zarr format 2 arrays do not support `serializer`."),
({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."),
],
)
async def test_create_array_invalid_v2_arguments(
kwargs: dict[str, Any], error_msg: str, store: MemoryStore
) -> None:
with pytest.raises(ValueError, match=re.escape(error_msg)):
await zarr.api.asynchronous.create_array(
store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs
)

@staticmethod
@pytest.mark.parametrize(
("kwargs", "error_msg"),
[
(
{"dimension_names": ["test"]},
"dimension_names cannot be used for arrays with zarr_format 2.",
),
(
{"chunk_key_encoding": {"name": "default", "separator": "/"}},
"chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead.",
),
(
{"codecs": "bytes"},
"codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead.",
),
],
)
async def test_create_invalid_v2_arguments(
kwargs: dict[str, Any], error_msg: str, store: MemoryStore
) -> None:
with pytest.raises(ValueError, match=re.escape(error_msg)):
await zarr.api.asynchronous.create(
store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs
)

@staticmethod
@pytest.mark.parametrize(
("kwargs", "error_msg"),
[
(
{"chunk_shape": (1,), "chunks": (2,)},
"Only one of chunk_shape or chunks can be provided.",
),
(
{"dimension_separator": "/"},
"dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead.",
),
(
{"filters": []},
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead",
),
(
{"compressor": "blosc"},
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead",
),
],
)
async def test_invalid_v3_arguments(
kwargs: dict[str, Any], error_msg: str, store: MemoryStore
) -> None:
kwargs.setdefault("chunks", (1,))
with pytest.raises(ValueError, match=re.escape(error_msg)):
zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs)

@staticmethod
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
@pytest.mark.parametrize(
@@ -1585,3 +1717,11 @@ async def test_sharding_coordinate_selection() -> None:
result = arr[1, [0, 1]] # type: ignore[index]
assert isinstance(result, NDArrayLike)
assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()


@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
def test_array_repr(store: Store) -> None:
shape = (2, 3, 4)
dtype = "uint8"
arr = zarr.create_array(store, shape=shape, dtype=dtype)
assert str(arr) == f"<Array {store} shape={shape} dtype={dtype}>"