Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support zarr write_empty_chunks for zarr-python 3 and up #10177

Merged
merged 6 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,13 +1084,19 @@ def _open_existing_array(self, *, name) -> ZarrArray:
# - Existing variables already have their attrs included in the consolidated metadata file.
# - The size of dimensions can not be expanded, that would require a call using `append_dim`
# which is mutually exclusive with `region`
empty: dict[str, bool] | dict[str, dict[str, bool]]
if _zarr_v3():
empty = dict(config={"write_empty_chunks": self._write_empty})
else:
empty = dict(write_empty_chunks=self._write_empty)

zarr_array = zarr.open(
store=(
self.zarr_group.store if _zarr_v3() else self.zarr_group.chunk_store
),
# TODO: see if zarr should normalize these strings.
path="/".join([self.zarr_group.name.rstrip("/"), name]).lstrip("/"),
write_empty_chunks=self._write_empty,
**empty,
)
else:
zarr_array = self.zarr_group[name]
Expand All @@ -1115,6 +1121,14 @@ def _create_new_array(
else:
encoding["write_empty_chunks"] = self._write_empty

if _zarr_v3():
# zarr v3 deprecated origin and write_empty_chunks
# instead preferring to pass them via the config argument
encoding["config"] = {}
for c in ("write_empty_chunks", "order"):
if c in encoding:
encoding["config"][c] = encoding.pop(c)

zarr_array = self.zarr_group.create(
name,
shape=shape,
Expand Down
83 changes: 66 additions & 17 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from collections.abc import Generator, Iterator, Mapping
from contextlib import ExitStack
from io import BytesIO
from os import listdir
from pathlib import Path
from typing import TYPE_CHECKING, Any, Final, Literal, cast
from unittest.mock import patch
Expand Down Expand Up @@ -3698,31 +3697,56 @@ def roundtrip_dir(

@pytest.mark.parametrize("consolidated", [True, False, None])
@pytest.mark.parametrize("write_empty", [True, False, None])
@pytest.mark.skipif(
has_zarr_v3, reason="zarr-python 3.x removed write_empty_chunks"
)
def test_write_empty(
self, consolidated: bool | None, write_empty: bool | None
self,
consolidated: bool | None,
write_empty: bool | None,
) -> None:
if write_empty is False:
expected = ["0.1.0", "1.1.0"]
def assert_expected_files(expected: list[str], store: str) -> None:
"""Convenience for comparing with actual files written"""
ls = []
test_root = os.path.join(store, "test")
for root, _, files in os.walk(test_root):
ls.extend(
[
os.path.join(root, f).removeprefix(test_root).lstrip("/")
for f in files
]
)

assert set(expected) == set(
[
file.lstrip("c/")
for file in ls
if (file not in (".zattrs", ".zarray", "zarr.json"))
]
)

# The zarr format is set by the `default_zarr_format`
# pytest fixture that acts on a superclass
zarr_format_3 = has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3
if (write_empty is False) or (write_empty is None and has_zarr_v3):
expected = ["0.1.0"]
else:
expected = [
"0.0.0",
"0.0.1",
"0.1.0",
"0.1.1",
"1.0.0",
"1.0.1",
"1.1.0",
"1.1.1",
]

data = np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2))
if zarr_format_3:
data = np.array([0.0, 0, 1.0, 0]).reshape((1, 2, 2))
# transform to the path style of zarr 3
# e.g. 0/0/1
expected = [e.replace(".", "/") for e in expected]

ds = xr.Dataset(
data_vars={
"test": (
("Z", "Y", "X"),
np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2)),
data,
)
}
)
Expand All @@ -3741,17 +3765,42 @@ def test_write_empty(
write_empty_chunks=write_empty,
)

# check expected files after a write
assert_expected_files(expected, store)

with self.roundtrip_dir(
ds,
store,
{"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
save_kwargs={
"mode": "a",
"append_dim": "Z",
"write_empty_chunks": write_empty,
},
) as a_ds:
expected_ds = xr.concat([ds, ds], dim="Z")

assert_identical(a_ds, expected_ds)

ls = listdir(os.path.join(store, "test"))
assert set(expected) == set([file for file in ls if file[0] != "."])
assert_identical(a_ds, expected_ds.compute())
# add the new files we expect to be created by the append
# that was performed by the roundtrip_dir
if (write_empty is False) or (write_empty is None and has_zarr_v3):
expected.append("1.1.0")
else:
if not has_zarr_v3:
# TODO: remove zarr3 if once zarr issue is fixed
# https://github.com/zarr-developers/zarr-python/issues/2931
expected.extend(
[
"1.1.0",
"1.0.0",
"1.0.1",
"1.1.1",
]
)
else:
expected.append("1.1.0")
if zarr_format_3:
expected = [e.replace(".", "/") for e in expected]
assert_expected_files(expected, store)

def test_avoid_excess_metadata_calls(self) -> None:
"""Test that chunk requests do not trigger redundant metadata requests.
Expand Down
Loading