diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5b7e9d7af47..d59cd8fb174 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1084,13 +1084,19 @@ def _open_existing_array(self, *, name) -> ZarrArray: # - Existing variables already have their attrs included in the consolidated metadata file. # - The size of dimensions can not be expanded, that would require a call using `append_dim` # which is mutually exclusive with `region` + empty: dict[str, bool] | dict[str, dict[str, bool]] + if _zarr_v3(): + empty = dict(config={"write_empty_chunks": self._write_empty}) + else: + empty = dict(write_empty_chunks=self._write_empty) + zarr_array = zarr.open( store=( self.zarr_group.store if _zarr_v3() else self.zarr_group.chunk_store ), # TODO: see if zarr should normalize these strings. path="/".join([self.zarr_group.name.rstrip("/"), name]).lstrip("/"), - write_empty_chunks=self._write_empty, + **empty, ) else: zarr_array = self.zarr_group[name] @@ -1115,6 +1121,14 @@ def _create_new_array( else: encoding["write_empty_chunks"] = self._write_empty + if _zarr_v3(): + # zarr v3 deprecated origin and write_empty_chunks + # instead preferring to pass them via the config argument + encoding["config"] = {} + for c in ("write_empty_chunks", "order"): + if c in encoding: + encoding["config"][c] = encoding.pop(c) + zarr_array = self.zarr_group.create( name, shape=shape, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ab7b8298618..ec9f2fe8aef 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -17,7 +17,6 @@ from collections.abc import Generator, Iterator, Mapping from contextlib import ExitStack from io import BytesIO -from os import listdir from pathlib import Path from typing import TYPE_CHECKING, Any, Final, Literal, cast from unittest.mock import patch @@ -3698,34 +3697,54 @@ def roundtrip_dir( @pytest.mark.parametrize("consolidated", [True, False, None]) @pytest.mark.parametrize("write_empty", [True, False, None]) - @pytest.mark.skipif( - has_zarr_v3, reason="zarr-python 3.x removed write_empty_chunks" - ) def test_write_empty( - self, consolidated: bool | None, write_empty: bool | None + self, + consolidated: bool | None, + write_empty: bool | None, ) -> None: - if write_empty is False: - expected = ["0.1.0", "1.1.0"] + def assert_expected_files(expected: list[str], store: str) -> None: + """Convenience for comparing with actual files written""" + ls = [] + test_root = os.path.join(store, "test") + for root, _, files in os.walk(test_root): + ls.extend( + [ + os.path.join(root, f).removeprefix(test_root).lstrip("/") + for f in files + ] + ) + + assert set(expected) == set( + [ + file.lstrip("c/") + for file in ls + if (file not in (".zattrs", ".zarray", "zarr.json")) + ] + ) + + # The zarr format is set by the `default_zarr_format` + # pytest fixture that acts on a superclass + zarr_format_3 = has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3 + if (write_empty is False) or (write_empty is None and has_zarr_v3): + expected = ["0.1.0"] else: expected = [ "0.0.0", "0.0.1", "0.1.0", "0.1.1", - "1.0.0", - "1.0.1", - "1.1.0", - "1.1.1", ] - ds = xr.Dataset( - data_vars={ - "test": ( - ("Z", "Y", "X"), - np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2)), - ) - } - ) + if zarr_format_3: + data = np.array([0.0, 0, 1.0, 0]).reshape((1, 2, 2)) + # transform to the path style of zarr 3 + # e.g. 0/0/1 + expected = [e.replace(".", "/") for e in expected] + else: + # use nan for default fill_value behaviour + data = np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2)) + + ds = xr.Dataset(data_vars={"test": (("Z", "Y", "X"), data)}) if has_dask: ds["test"] = ds["test"].chunk(1) @@ -3741,17 +3760,42 @@ def test_write_empty( write_empty_chunks=write_empty, ) + # check expected files after a write + assert_expected_files(expected, store) + with self.roundtrip_dir( ds, store, - {"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty}, + save_kwargs={ + "mode": "a", + "append_dim": "Z", + "write_empty_chunks": write_empty, + }, ) as a_ds: expected_ds = xr.concat([ds, ds], dim="Z") - assert_identical(a_ds, expected_ds) - - ls = listdir(os.path.join(store, "test")) - assert set(expected) == set([file for file in ls if file[0] != "."]) + assert_identical(a_ds, expected_ds.compute()) + # add the new files we expect to be created by the append + # that was performed by the roundtrip_dir + if (write_empty is False) or (write_empty is None and has_zarr_v3): + expected.append("1.1.0") + else: + if not has_zarr_v3: + # TODO: remove zarr3 if once zarr issue is fixed + # https://github.com/zarr-developers/zarr-python/issues/2931 + expected.extend( + [ + "1.1.0", + "1.0.0", + "1.0.1", + "1.1.1", + ] + ) + else: + expected.append("1.1.0") + if zarr_format_3: + expected = [e.replace(".", "/") for e in expected] + assert_expected_files(expected, store) def test_avoid_excess_metadata_calls(self) -> None: """Test that chunk requests do not trigger redundant metadata requests.