Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ cu11 = [ "cupy-cuda11x" ]
lazy = [ "xarray>=2025.06.1", "aiohttp", "requests", "anndata[dask]" ]
# https://github.com/dask/dask/issues/11290
# https://github.com/dask/dask/issues/11752
dask = [ "dask[array]>=2023.5.1,!=2024.8.*,!=2024.9.*,<2025.2.0" ]
dask = [
"dask[array]>=2023.5.1,!=2024.8.*,!=2024.9.*,!=2025.2.*,!=2025.3.*,!=2025.4.*,!=2025.5.*,!=2025.6.*,!=2025.7.*,!=2025.8.*",
]

[tool.hatch.version]
source = "vcs"
Expand Down
3 changes: 3 additions & 0 deletions src/anndata/_core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,9 @@ def _apply_to_dask_array(self, el: DaskArray, *, axis, fill_value=None):
sub_el = _subset(el, make_slice(indexer, axis, len(shape)))

if any(indexer == -1):
# TODO: Remove this condition once https://github.com/dask/dask/pull/12078 is released
if isinstance(sub_el._meta, CSArray | CSMatrix) and np.isscalar(fill_value):
fill_value = np.array([[fill_value]])
sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value

return sub_el
Expand Down
22 changes: 12 additions & 10 deletions tests/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,17 +957,20 @@ def test_nan_merge(axis_name, join_type, array_type):
alt_axis, alt_axis_name = merge._resolve_axis(1 - axis)
mapping_attr = f"{alt_axis_name}m"
adata_shape = (20, 10)

arr = array_type(
sparse.random(adata_shape[alt_axis], 10, density=0.1, format="csr")
)
arr_nan = arr.copy()
# TODO: Revert to https://github.com/scverse/anndata/blob/71fdf821919fc5ff3c864dc74c4432c370573984/tests/test_concatenate.py#L961-L970 after https://github.com/scipy/scipy/pull/23626.
# The need for this handling arose as a result of
# https://github.com/dask/dask/pull/11755/files#diff-65211e64fa680da306e9612b92c60f557365507d46486325f0e7e04359bce64fR456-R459
sparse_arr = sparse.random(adata_shape[alt_axis], 10, density=0.1, format="csr")
sparse_arr_nan = sparse_arr.copy()
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=sparse.SparseEfficiencyWarning)
for _ in range(10):
arr_nan[np.random.choice(arr.shape[0]), np.random.choice(arr.shape[1])] = (
np.nan
)
sparse_arr_nan[
np.random.choice(sparse_arr.shape[0]),
np.random.choice(sparse_arr.shape[1]),
] = np.nan
arr = array_type(sparse_arr)
arr_nan = array_type(sparse_arr_nan)

_data = {"X": sparse.csr_matrix(adata_shape), mapping_attr: {"arr": arr_nan}}
orig1 = AnnData(**_data)
Expand Down Expand Up @@ -1811,7 +1814,7 @@ def test_concat_dask_sparse_matches_memory(join_type, merge_strategy):
X = sparse.random(50, 20, density=0.5, format="csr")
X_dask = da.from_array(X, chunks=(5, 20))
var_names_1 = [f"gene_{i}" for i in range(20)]
var_names_2 = [f"gene_{i}{'_foo' if (i % 2) else ''}" for i in range(20, 40)]
var_names_2 = [f"gene_{i}{'_foo' if (i % 2) else ''}" for i in range(20)]

ad1 = AnnData(X=X, var=pd.DataFrame(index=var_names_1))
ad2 = AnnData(X=X, var=pd.DataFrame(index=var_names_2))
Expand All @@ -1821,7 +1824,6 @@ def test_concat_dask_sparse_matches_memory(join_type, merge_strategy):

res_in_memory = concat([ad1, ad2], join=join_type, merge=merge_strategy)
res_dask = concat([ad1_dask, ad2_dask], join=join_type, merge=merge_strategy)

assert_equal(res_in_memory, res_dask)


Expand Down
4 changes: 3 additions & 1 deletion tests/test_dask_view_mem.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@ def _alloc_cache():
# if we put a 2 factor on 2**19
# the results seems more accurate with the experimental results
# For example from dask.random we allocate 1mb
# As of 2025.09.* dask, this needs a bit more than the previous 1.5mb.
# TODO: Why?
@pytest.mark.usefixtures("_alloc_cache")
@pytest.mark.limit_memory("1.5 MB")
@pytest.mark.limit_memory("1.7 MB")
def test_size_of_view(mapping_name, give_chunks):
import dask.array as da

Expand Down
14 changes: 8 additions & 6 deletions tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from contextlib import ExitStack
from contextlib import ExitStack, nullcontext
from copy import deepcopy
from operator import mul
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -189,14 +189,16 @@ def test_modify_view_component(matrix_type, mapping_name, request):
with pytest.warns(ad.ImplicitModificationWarning, match=rf".*\.{mapping_name}.*"):
m[0, 0] = 100
assert not subset.is_view
assert getattr(subset, mapping_name)["m"][0, 0] == 100
# TODO: Remove `raises` after https://github.com/scipy/scipy/pull/23626.
with (
pytest.raises(ValueError, match=r"shape mismatch")
if "sparse_dask" in request.node.callspec.id
else nullcontext()
):
assert getattr(subset, mapping_name)["m"][0, 0] == 100

assert init_hash == hash_func(adata)

if "sparse_array_dask_array" in request.node.callspec.id:
msg = "sparse arrays in dask are generally expected to fail but in this case they do not"
pytest.fail(msg)


@pytest.mark.parametrize("attr", ["obsm", "varm"])
def test_set_obsm_key(adata, attr):
Expand Down
Loading