Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a417371
remove funny-looking line and refactor to ensure reading consolidated…
TomNicholas Mar 13, 2025
8756919
parametrize over whether or not we write consolidated metadata
TomNicholas Mar 13, 2025
b85d70d
fix consolidated metadata
TomNicholas Mar 13, 2025
f1cc331
ian hcanges
ianhi Mar 13, 2025
296ed03
open_datatree_specific_group consolidated true works
ianhi Mar 13, 2025
4da72ae
test: add consolidated parametrize to zarr datatree test
ianhi Mar 13, 2025
5f7c6b9
fix: group finding behavior consolidated
ianhi Mar 13, 2025
5dc7df7
Merge remote-tracking branch 'ianhi/aladinor/ian/updates' into dtree_…
TomNicholas Mar 17, 2025
9823d64
remove more debugging print statements
TomNicholas Mar 17, 2025
980ebb4
Merge branch 'dtree-zarrv3' into dtree-zarrv3-2
TomNicholas Mar 17, 2025
30f5bba
revert changes to test fixture
TomNicholas Mar 18, 2025
4d1fdb5
formatting
TomNicholas Mar 18, 2025
ecef578
add decorator to parametrize over zarr formats
TomNicholas Mar 18, 2025
c2a1f5f
ensure both versions of zarr-python and both versions of zarr-python …
TomNicholas Mar 18, 2025
cde6b65
change datatree fixture to not produce values that would be fill_valu…
TomNicholas Mar 18, 2025
09fad6e
refactor test to make expected behaviour clearer
TomNicholas Mar 18, 2025
77575b5
fix wrongly expected behaviour - should not expect inherited variable…
TomNicholas Mar 19, 2025
0a9f874
make arrays no longer scalars to dodge https://github.com/pydata/xarr…
TomNicholas Mar 19, 2025
565938b
Merge branch 'dtree-zarrv3-2' of https://github.com/TomNicholas/xarra…
TomNicholas Mar 19, 2025
daf0f42
fix bad merge
TomNicholas Mar 19, 2025
84bde40
parametrize almost every test over zarr_format
TomNicholas Mar 19, 2025
04d937c
parametrize encoding test over zarr_formats
TomNicholas Mar 19, 2025
765c5f0
use xfail in encoding test
TomNicholas Mar 19, 2025
7eee31c
updated expected behaviour of zarr on-disk in light of https://github…
TomNicholas Mar 19, 2025
0969422
fully revert change to simple_datatree test fixture by considered zar…
TomNicholas Mar 19, 2025
cacf419
parametrize unaligned_zarr test fixture over zarr_format
TomNicholas Mar 19, 2025
1a60ebe
move parametrize_over_zarr_format decorator to apply to entire test c…
TomNicholas Mar 19, 2025
d98abe3
for now explicitly consolidate metadata in test fixture
TomNicholas Mar 19, 2025
2dcefe4
correct bug in writing of consolidated metadata
TomNicholas Mar 19, 2025
a88e503
delete commented-out lines
TomNicholas Mar 19, 2025
22ac9b4
merges from main
TomNicholas Mar 19, 2025
69dc976
Revert "merges from main"
TomNicholas Mar 19, 2025
6e3e2aa
fix encodings test for zarr_format=3
TomNicholas Mar 19, 2025
6ce9578
tidy up
TomNicholas Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 51 additions & 43 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def open_store(
use_zarr_fill_value_as_mask=None,
write_empty: bool | None = None,
cache_members: bool = True,
):
) -> ZarrStore:
(
zarr_group,
consolidate_on_close,
Expand All @@ -666,21 +666,19 @@ def open_store(
use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
zarr_format=zarr_format,
)

from zarr import Group

group_members: dict[str, Group]
if _zarr_v3():
group_members = {
f"{group}/{path}" if group != "/" else f"/{path}": store
for path, store in dict(zarr_group.members(max_depth=None)).items()
if isinstance(store, Group)
}
group_members[group] = zarr_group
else:
group_paths = list(_iter_zarr_groups(zarr_group, parent=group))
group_members = {path: zarr_group.get(path) for path in group_paths}
group_members: dict[str, Group] = {}
group_paths = list(_iter_zarr_groups(zarr_group, parent=group))
for path in group_paths:
if path == group:
group_members[path] = zarr_group
else:
rel_path = path.removeprefix(f"{group}/")
group_members[path] = zarr_group[rel_path.removeprefix("/")]

return {
out = {
group: cls(
group_store,
mode,
Expand All @@ -695,6 +693,7 @@ def open_store(
)
for group, group_store in group_members.items()
}
return out

@classmethod
def open_group(
Expand Down Expand Up @@ -1047,8 +1046,6 @@ def store(
if self._consolidate_on_close:
kwargs = {}
if _zarr_v3():
# https://github.com/zarr-developers/zarr-python/pull/2113#issuecomment-2386718323
kwargs["path"] = self.zarr_group.name.lstrip("/")
Copy link
Collaborator Author

@TomNicholas TomNicholas Mar 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI @TomAugspurger @jhamman and I are 95% sure that this line was unnecessary and a bug. It was causing consolidated metadata to not be written at the store root each time, and now that we changed it, it fixed various buggy behaviour we were seeing.

relevant to zarr-developers/zarr-python#2113 (comment)

kwargs["zarr_format"] = self.zarr_group.metadata.zarr_format
zarr.consolidate_metadata(self.zarr_group.store, **kwargs)

Expand Down Expand Up @@ -1697,7 +1694,6 @@ def open_groups_as_dict(
)

groups_dict = {}

for path_group, store in stores.items():
store_entrypoint = StoreBackendEntrypoint()

Expand Down Expand Up @@ -1778,40 +1774,52 @@ def _get_open_params(
else:
missing_exc = zarr.errors.GroupNotFoundError

if consolidated is None:
try:
zarr_group = zarr.open_consolidated(store, **open_kwargs)
except (ValueError, KeyError):
# ValueError in zarr-python 3.x, KeyError in 2.x.
if consolidated in [None, True]:
# open the root of the store, in case there is metadata consolidated there
group = open_kwargs.pop("path")

if consolidated:
# TODO: an option to pass the metadata_key keyword
zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
elif consolidated is None:
# same but with more error handling in case no consolidated metadata found
try:
zarr_group = zarr.open_group(store, **open_kwargs)
emit_user_level_warning(
"Failed to open Zarr store with consolidated metadata, "
"but successfully read with non-consolidated metadata. "
"This is typically much slower for opening a dataset. "
"To silence this warning, consider:\n"
"1. Consolidating metadata in this existing store with "
"zarr.consolidate_metadata().\n"
"2. Explicitly setting consolidated=False, to avoid trying "
"to read consolidate metadata, or\n"
"3. Explicitly setting consolidated=True, to raise an "
"error in this case instead of falling back to try "
"reading non-consolidated metadata.",
RuntimeWarning,
)
except missing_exc as err:
raise FileNotFoundError(
f"No such file or directory: '{store}'"
) from err
elif consolidated:
# TODO: an option to pass the metadata_key keyword
zarr_group = zarr.open_consolidated(store, **open_kwargs)
zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
except (ValueError, KeyError):
# ValueError in zarr-python 3.x, KeyError in 2.x.
try:
zarr_root_group = zarr.open_group(store, **open_kwargs)
emit_user_level_warning(
"Failed to open Zarr store with consolidated metadata, "
"but successfully read with non-consolidated metadata. "
"This is typically much slower for opening a dataset. "
"To silence this warning, consider:\n"
"1. Consolidating metadata in this existing store with "
"zarr.consolidate_metadata().\n"
"2. Explicitly setting consolidated=False, to avoid trying "
"to read consolidate metadata, or\n"
"3. Explicitly setting consolidated=True, to raise an "
"error in this case instead of falling back to try "
"reading non-consolidated metadata.",
RuntimeWarning,
)
except missing_exc as err:
raise FileNotFoundError(
f"No such file or directory: '{store}'"
) from err

# but the user should still receive a DataTree whose root is the group they asked for
if group and group != "/":
zarr_group = zarr_root_group[group.removeprefix("/")]
else:
zarr_group = zarr_root_group
else:
if _zarr_v3():
# we have determined that we don't want to use consolidated metadata
# so we set that to False to avoid trying to read it
open_kwargs["use_consolidated"] = False
zarr_group = zarr.open_group(store, **open_kwargs)

close_store_on_close = zarr_group.store is not store

# we use this to determine how to handle fill_value
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ def _importorskip(

has_array_api_strict, requires_array_api_strict = _importorskip("array_api_strict")

parametrize_zarr_format = pytest.mark.parametrize(
"zarr_format",
[
pytest.param(2, id="zarr_format=2"),
pytest.param(
3,
marks=pytest.mark.skipif(
not has_zarr_v3,
reason="zarr-python v2 cannot understand the zarr v3 format",
),
id="zarr_format=3",
),
],
)


def _importorskip_h5netcdf_ros3(has_h5netcdf: bool):
if not has_h5netcdf:
Expand Down
3 changes: 2 additions & 1 deletion xarray/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ def create_test_datatree():
"""

def _create_test_datatree(modify=lambda ds: ds):
set1_data = modify(xr.Dataset({"a": 1, "b": 2}))
# note: No arrays are fully zeroes to avoid confusing behaviour with zarr-python's default fill_value
set1_data = modify(xr.Dataset({"a": 0, "b": 1}))
set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])}))
root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}))

Expand Down
Loading