Skip to content

Commit cf54bf5

Browse files
committed
Backport PR #2445: fix: dont override dataset kwargs in loop for sparse
1 parent eaf2cd6 commit cf54bf5

3 files changed

Lines changed: 24 additions & 2 deletions

File tree

docs/release-notes/2445.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make sure `indices`, `data`, and `indptr` have `zarr.config.set({'array.target_shard_size_bytes'})` applied instead of having one override the other's setting when writing sparse matrices. {user}`ilan-gold`

src/anndata/_io/specs/methods.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,9 +838,9 @@ def write_sparse_compressed(
838838
else:
839839
with zarr_v3_sharding(
840840
dataset_kwargs, format=f.metadata.zarr_format
841-
) as dataset_kwargs:
841+
) as dataset_kwargs_local:
842842
arr = g.create_array(
843-
attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs
843+
attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs_local
844844
)
845845
# see https://github.com/zarr-developers/zarr-python/discussions/2712
846846
arr[...] = attr[...]

tests/test_io_elementwise.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,27 @@ def test_write_auto_sharded_default_warns(tmp_path: Path):
969969
adata.write_zarr(path)
970970

971971

972+
@pytest.mark.zarr_io
973+
@pytest.mark.skipif(
974+
Version(version("zarr")) < Version("3.1.4"),
975+
reason="autosharding with chosen size was not available",
976+
)
977+
def test_write_auto_sharded_size_sparse():
978+
path = "memory://check_shards.zarr"
979+
z = zarr.open(path)
980+
mat = sparse.random(
981+
1000, 1000, density=0.5, format="csr", random_state=np.random.default_rng(42)
982+
)
983+
ad.io.write_elem(z, "two_shards_per_sub_element", mat)
984+
# i.e., there are at most two shards since one shard will contain two chunks,
985+
# and the other the last elements, since the target size is 1GB uncompressed.
986+
for sub_element in ["indices", "data", "indptr"]:
987+
assert (
988+
z["two_shards_per_sub_element"][sub_element].shape[0]
989+
/ z["two_shards_per_sub_element"][sub_element].shards[0]
990+
) < 2, sub_element
991+
992+
972993
@pytest.mark.zarr_io
973994
@pytest.mark.skipif(is_zarr_v2(), reason="auto sharding is allowed only for zarr v3.")
974995
def test_write_auto_sharded_does_not_override(tmp_path: Path):

0 commit comments

Comments
 (0)