Skip to content

Add order parameter to to_dense #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/fast_array_utils/conv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,28 @@


@overload
def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ...
def to_dense(
x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False
) -> NDArray[Any]: ...


@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


def to_dense(
x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.DaskArray | types.CupyArray:
r"""Convert x to a dense array.
Expand All @@ -52,6 +55,16 @@ def to_dense(
----------
x
Input object to be converted.
order
The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``.
Comment on lines +58 to +59
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add more info here i.e., what you say in the PR comment

Copy link
Member Author

@flying-sheep flying-sheep May 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


The default matches numpy, and therefore diverges from the ``scipy.sparse`` matrices’
:meth:`~scipy.sparse.csr_array.toarray`\ ’s default behavior
of always returning a ``C``-contiguous array.
Instead, CSC matrices become F-contiguous arrays when ``order="K"`` (the default).

Dask :class:`~dask.array.Array`\ s concatenation behavior will result in ``order``
having no effect on the :func:`dask.compute` / ``to_cpu_memory=True`` result.
to_cpu_memory
Also load data into memory (resulting in a :class:`numpy.ndarray`).

Expand All @@ -60,4 +73,4 @@ def to_dense(
Dense form of ``x``

"""
return to_dense_(x, to_cpu_memory=to_cpu_memory)
return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory)
40 changes: 29 additions & 11 deletions src/fast_array_utils/conv/_to_dense.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations

import warnings
from functools import partial, singledispatch
from typing import TYPE_CHECKING, cast

Expand All @@ -11,7 +12,7 @@


if TYPE_CHECKING:
from typing import Any
from typing import Any, Literal

from numpy.typing import NDArray

Expand All @@ -22,40 +23,57 @@
x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.CupyArray | types.DaskArray:
del to_cpu_memory # it already is
return np.asarray(x)
return np.asarray(x, order=order)


@to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc]
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import scipy

del to_cpu_memory # it already is
return scipy.to_dense(x)
return scipy.to_dense(x, order=sparse_order(x, order=order))


@to_dense_.register(types.DaskArray)
def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
from . import to_dense

x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory))
if order == "F":
msg = f"{order=!r} will probably be ignored: Dask can not be made to emit F-contiguous arrays reliably."
warnings.warn(msg, RuntimeWarning, stacklevel=4)
x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory))

Check warning on line 48 in src/fast_array_utils/conv/_to_dense.py

View check run for this annotation

Codecov / codecov/patch

src/fast_array_utils/conv/_to_dense.py#L45-L48

Added lines #L45 - L48 were not covered by tests
return x.compute() if to_cpu_memory else x # type: ignore[return-value]


@to_dense_.register(types.CSDataset)
def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import to_dense

if not to_cpu_memory:
msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset"
raise ValueError(msg)
# TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003
return to_dense(cast("types.CSBase", x.to_memory()))
return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order))


@to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc]
def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
x = x.toarray() if isinstance(x, types.CupySpMatrix) else x
return x.get() if to_cpu_memory else x
def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
import cupy as cu

Check warning on line 65 in src/fast_array_utils/conv/_to_dense.py

View check run for this annotation

Codecov / codecov/patch

src/fast_array_utils/conv/_to_dense.py#L65

Added line #L65 was not covered by tests

x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order)
return x.get(order="A") if to_cpu_memory else x

Check warning on line 68 in src/fast_array_utils/conv/_to_dense.py

View check run for this annotation

Codecov / codecov/patch

src/fast_array_utils/conv/_to_dense.py#L67-L68

Added lines #L67 - L68 were not covered by tests


def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]:
if TYPE_CHECKING:
from scipy.sparse._base import _spbase

assert isinstance(x, _spbase | types.CSDataset)

if order in {"K", "A"}:
order = "F" if x.format == "csc" else "C"
return cast("Literal['C', 'F']", order)
27 changes: 21 additions & 6 deletions src/fast_array_utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@


__all__ = [
"COOBase",
"CSArray",
"CSBase",
"CSDataset",
"CSMatrix",
"CupyArray",
"CupyCOOMatrix",
"CupyCSCMatrix",
"CupyCSMatrix",
"CupyCSRMatrix",
Expand All @@ -22,30 +24,41 @@
"H5Group",
"ZarrArray",
"ZarrGroup",
"coo_array",
"coo_matrix",
"csc_array",
"csc_matrix",
"csr_array",
"csr_matrix",
"sparray",
"spmatrix",
]

T_co = TypeVar("T_co", covariant=True)


# scipy sparse
if TYPE_CHECKING:
from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix
from scipy.sparse import coo_array, coo_matrix, csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix
else:
try: # cs?_array isn’t available in older scipy versions
from scipy.sparse import csc_array, csr_array, sparray
from scipy.sparse import coo_array, csc_array, csr_array, sparray
except ImportError: # pragma: no cover
coo_array = type("coo_array", (), {})
csc_array = type("csc_array", (), {})
csr_array = type("csr_array", (), {})
sparray = type("sparray", (), {})
csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse"
coo_array.__module__ = csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse"

try: # cs?_matrix is available when scipy is installed
from scipy.sparse import csc_matrix, csr_matrix, spmatrix
from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, spmatrix
except ImportError: # pragma: no cover
coo_matrix = type("coo_matrix", (), {})
csc_matrix = type("csc_matrix", (), {})
csr_matrix = type("csr_matrix", (), {})
spmatrix = type("spmatrix", (), {})
csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse"
coo_matrix.__module__ = csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse"
COOBase = coo_matrix | coo_array
CSMatrix = csc_matrix | csr_matrix
CSArray = csc_array | csr_array
CSBase = CSMatrix | CSArray
Expand All @@ -54,16 +67,18 @@

if TYPE_CHECKING or find_spec("cupy"): # cupy always comes with cupyx
from cupy import ndarray as CupyArray
from cupyx.scipy.sparse import coo_matrix as CupyCOOMatrix

Check warning on line 70 in src/fast_array_utils/types.py

View check run for this annotation

Codecov / codecov/patch

src/fast_array_utils/types.py#L70

Added line #L70 was not covered by tests
from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix
from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix
from cupyx.scipy.sparse import spmatrix as CupySpMatrix
else: # pragma: no cover
CupyArray = type("ndarray", (), {})
CupyArray.__module__ = "cupy"
CupyCOOMatrix = type("coo_matrix", (), {})
CupyCSCMatrix = type("csc_matrix", (), {})
CupyCSRMatrix = type("csr_matrix", (), {})
CupySpMatrix = type("spmatrix", (), {})
CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse"
CupyCOOMatrix.__module__ = CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse"
CupyCSMatrix = CupyCSRMatrix | CupyCSCMatrix


Expand Down
8 changes: 4 additions & 4 deletions src/testing/fast_array_utils/_array_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
import h5py
from numpy.typing import ArrayLike, DTypeLike, NDArray

from fast_array_utils.types import CSBase
from fast_array_utils.typing import CpuArray, DiskArray, GpuArray

InnerArray = CpuArray | GpuArray | DiskArray
Array: TypeAlias = InnerArray | types.DaskArray | types.CSDataset
ExtendedArray = Array | types.COOBase | types.CupyCOOMatrix

Arr = TypeVar("Arr", bound=Array, default=Array)
Arr_co = TypeVar("Arr_co", bound=Array, covariant=True)
Arr = TypeVar("Arr", bound=ExtendedArray, default=Array)
Arr_co = TypeVar("Arr_co", bound=ExtendedArray, covariant=True)

Inner = TypeVar("Inner", bound="ArrayType[InnerArray, None] | None", default=Any)

Expand Down Expand Up @@ -305,7 +305,7 @@ def _to_scipy_sparse(
/,
*,
dtype: DTypeLike | None = None,
cls: type[CSBase] | None = None,
cls: type[types.CSBase] | None = None,
) -> types.CSBase:
"""Convert to a scipy sparse matrix/array."""
if isinstance(x, types.DaskArray):
Expand Down
6 changes: 4 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
if TYPE_CHECKING:
from collections.abc import Callable

from fast_array_utils import types


@pytest.fixture
def dask_viz(request: pytest.FixtureRequest, cache: pytest.Cache) -> Callable[[object], None]:
Expand Down Expand Up @@ -41,5 +43,5 @@ def viz(obj: object) -> None:


@pytest.fixture(scope="session", params=COO_PARAMS)
def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType:
return cast("ArrayType", request.param)
def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType[types.COOBase | types.CupyCOOMatrix]:
return cast("ArrayType[types.COOBase | types.CupyCOOMatrix]", request.param)
4 changes: 1 addition & 3 deletions tests/test_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
if TYPE_CHECKING:
from typing import Any

from cupyx.scipy.sparse import coo_matrix as CupyCooMatrix
from numpy.typing import DTypeLike, NDArray
from scipy.sparse import coo_array, coo_matrix

from testing.fast_array_utils import Array, ArrayType

Expand Down Expand Up @@ -54,7 +52,7 @@ def test_conv_other(array_type: ArrayType, other_array_type: ArrayType) -> None:
@pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu)
def test_conv_extra(
array_type: ArrayType[NDArray[np.number[Any]] | types.CSBase],
coo_matrix_type: ArrayType[coo_matrix | coo_array | CupyCooMatrix],
coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix],
) -> None:
src_arr = array_type(np.arange(12).reshape(3, 4), dtype=np.float32)
arr = coo_matrix_type(src_arr)
Expand Down
70 changes: 61 additions & 9 deletions tests/test_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,46 +13,98 @@


if TYPE_CHECKING:
from typing import TypeAlias
from collections.abc import Iterable
from typing import Literal, TypeAlias

from fast_array_utils.typing import CpuArray, DiskArray, GpuArray
from testing.fast_array_utils import ArrayType

Array: TypeAlias = CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray
ExtendedArray: TypeAlias = Array | types.COOBase | types.CupyCOOMatrix


WARNS_NUMBA = pytest.warns(RuntimeWarning, match="numba is not installed; falling back to slow conversion")


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}:
with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"):
to_dense(x, to_cpu_memory=to_cpu_memory)
to_dense(x, order=order, to_cpu_memory=to_cpu_memory)
return

with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext():
arr = to_dense(x, to_cpu_memory=to_cpu_memory)
with (
pytest.warns(RuntimeWarning, match="Dask can not be made to emit F-contiguous arrays")
if (order == "F" and array_type.cls is types.DaskArray)
else nullcontext(),
WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(),
):
arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
# Dask is unreliable: for explicit “F”, we emit a warning (tested above), for “K” we just ignore the result
if not (array_type.cls is types.DaskArray and order in {"F", "K"}):
assert_expected_order(x, arr, order=order)


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense_extra(coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)

with WARNS_NUMBA if not find_spec("numba") else nullcontext():
arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory)
arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
assert_expected_order(src_mtx, arr, order=order)


def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None:
def assert_expected_cls(orig: ExtendedArray, converted: Array, *, to_cpu_memory: bool) -> None:
match (to_cpu_memory, orig):
case False, types.DaskArray():
assert isinstance(converted, types.DaskArray)
assert_expected_cls(orig._meta, converted._meta, to_cpu_memory=to_cpu_memory) # noqa: SLF001
assert_expected_cls(orig.compute(), converted.compute(), to_cpu_memory=to_cpu_memory)
case False, types.CupyArray() | types.CupySpMatrix():
assert isinstance(converted, types.CupyArray)
case _:
assert isinstance(converted, np.ndarray)


def assert_expected_order(orig: ExtendedArray, converted: Array, *, order: Literal["K", "C", "F"]) -> None:
match converted:
case types.CupyArray() | np.ndarray():
orders = {order_exp: converted.flags[f"{order_exp}_CONTIGUOUS"] for order_exp in (get_orders(orig) if order == "K" else {order})} # type: ignore[index]
assert any(orders.values()), orders
case types.DaskArray():
assert_expected_order(orig, converted.compute(), order=order)
case _:
pytest.fail(f"Unsupported array type: {type(converted)}")


def get_orders(orig: ExtendedArray) -> Iterable[Literal["C", "F"]]:
"""Get the orders of an array.

Numpy arrays with at most one axis of a length >1 are valid in both orders.
So are COO sparse matrices/arrays.
"""
match orig:
case np.ndarray() | types.CupyArray():
if orig.flags.c_contiguous:
yield "C"
if orig.flags.f_contiguous:
yield "F"
case _ if isinstance(orig, types.CSBase | types.COOBase | types.CupyCSMatrix | types.CupyCOOMatrix | types.CSDataset):
if orig.format in {"csr", "coo"}:
yield "C"
if orig.format in {"csc", "coo"}:
yield "F"
case types.DaskArray():
yield from get_orders(orig.compute())
case types.ZarrArray() | types.H5Dataset():
yield "C"
case _:
pytest.fail(f"Unsupported array type: {type(orig)}")
Loading
Loading