From ed6257041097d18e6e20d8fdde37fb05dcf3a9be Mon Sep 17 00:00:00 2001 From: Jacob Prince-Bieker Date: Mon, 13 Jan 2025 17:16:05 +0000 Subject: [PATCH 1/5] Add `shards` to `valid_encodings` to enable sharded Zarr writing --- xarray/backends/zarr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 383c385e1d5..402763ced54 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -448,6 +448,7 @@ def extract_zarr_variable_encoding( safe_to_drop = {"source", "original_shape", "preferred_chunks"} valid_encodings = { "chunks", + "shards", "compressor", # TODO: delete when min zarr >=3 "compressors", "filters", From b9d3b60806097e6bcc17a8ff38aa315ff22b4ffa Mon Sep 17 00:00:00 2001 From: Jacob Prince-Bieker Date: Fri, 17 Jan 2025 08:52:36 +0000 Subject: [PATCH 2/5] Add test for shards --- xarray/tests/test_backends.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index cfca5e69048..d504b5bf273 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2490,6 +2490,24 @@ def test_chunk_encoding(self) -> None: with self.roundtrip(data) as actual: pass + def test_shard_encoding(self) -> None: + # These datasets have no dask chunks. All chunking/sharding specified in + # encoding + data = create_test_data() + chunks = (1, 1) + shards = (5, 5) + data["var2"].encoding.update({"chunks": chunks}) + data["var2"].encoding.update({"shards": shards}) + + with self.roundtrip(data) as actual: + assert shards == actual["var2"].encoding["shards"] + + # expect an error with shards not divisible by chunks + data["var2"].encoding.update({"chunks": (2, 2)}) + with pytest.raises(TypeError): + with self.roundtrip(data) as actual: + pass + @requires_dask @pytest.mark.skipif( ON_WINDOWS, From 365a55800939c8636dd8864c66dfd4d7f0f6054a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 08:52:56 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d504b5bf273..1970b29ac4f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2507,7 +2507,7 @@ def test_shard_encoding(self) -> None: with pytest.raises(TypeError): with self.roundtrip(data) as actual: pass - + @requires_dask @pytest.mark.skipif( ON_WINDOWS, From 877aad63f2052b8ec51bfa4fd04b758a23a93b66 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Fri, 17 Jan 2025 09:39:51 +0000 Subject: [PATCH 4/5] Limit shard test to only Zarr V3 --- xarray/tests/test_backends.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1970b29ac4f..5ac2c451b9e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2493,20 +2493,20 @@ def test_chunk_encoding(self) -> None: def test_shard_encoding(self) -> None: # These datasets have no dask chunks. All chunking/sharding specified in # encoding - data = create_test_data() - chunks = (1, 1) - shards = (5, 5) - data["var2"].encoding.update({"chunks": chunks}) - data["var2"].encoding.update({"shards": shards}) - - with self.roundtrip(data) as actual: - assert shards == actual["var2"].encoding["shards"] - - # expect an error with shards not divisible by chunks - data["var2"].encoding.update({"chunks": (2, 2)}) - with pytest.raises(TypeError): + if has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3: + data = create_test_data() + chunks = (1, 1) + shards = (5, 5) + data["var2"].encoding.update({"chunks": chunks}) + data["var2"].encoding.update({"shards": shards}) with self.roundtrip(data) as actual: - pass + assert shards == actual["var2"].encoding["shards"] + + # expect an error with shards not divisible by chunks + data["var2"].encoding.update({"chunks": (2, 2)}) + with pytest.raises(TypeError): + with self.roundtrip(data) as actual: + pass @requires_dask @pytest.mark.skipif( From 4b8b7a343158bae1cc8d98a9ecc389346405e36b Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Fri, 17 Jan 2025 09:44:26 +0000 Subject: [PATCH 5/5] Update test and add shards to encoding when loading --- xarray/backends/zarr.py | 1 + xarray/tests/test_backends.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 402763ced54..381073b1c14 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -830,6 +830,7 @@ def open_store_variable(self, name): { "compressors": zarr_array.compressors, "filters": zarr_array.filters, + "shards": zarr_array.shards, } ) if self.zarr_group.metadata.zarr_format == 3: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5ac2c451b9e..3ecca6f211e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2504,7 +2504,7 @@ def test_shard_encoding(self) -> None: # expect an error with shards not divisible by chunks data["var2"].encoding.update({"chunks": (2, 2)}) - with pytest.raises(TypeError): + with pytest.raises(ValueError): with self.roundtrip(data) as actual: pass