Skip to content

Add order parameter to to_dense #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/fast_array_utils/conv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,28 @@


@overload
def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ...
def to_dense(
x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False
) -> NDArray[Any]: ...


@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


def to_dense(
x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.DaskArray | types.CupyArray:
r"""Convert x to a dense array.
Expand All @@ -52,6 +55,8 @@ def to_dense(
----------
x
Input object to be converted.
order
The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``.
to_cpu_memory
Also load data into memory (resulting in a :class:`numpy.ndarray`).

Expand All @@ -60,4 +65,4 @@ def to_dense(
Dense form of ``x``

"""
return to_dense_(x, to_cpu_memory=to_cpu_memory)
return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory)
36 changes: 25 additions & 11 deletions src/fast_array_utils/conv/_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


if TYPE_CHECKING:
from typing import Any
from typing import Any, Literal

from numpy.typing import NDArray

Expand All @@ -22,40 +22,54 @@ def to_dense_(
x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.CupyArray | types.DaskArray:
del to_cpu_memory # it already is
return np.asarray(x)
return np.asarray(x, order=order)


@to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc]
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import scipy

del to_cpu_memory # it already is
return scipy.to_dense(x)
return scipy.to_dense(x, order=sparse_order(x, order=order))


@to_dense_.register(types.DaskArray)
def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
from . import to_dense

x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory))
x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory))
return x.compute() if to_cpu_memory else x # type: ignore[return-value]


@to_dense_.register(types.CSDataset)
def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import to_dense

if not to_cpu_memory:
msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset"
raise ValueError(msg)
# TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003
return to_dense(cast("types.CSBase", x.to_memory()))
return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order))


@to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc]
def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
x = x.toarray() if isinstance(x, types.CupySpMatrix) else x
return x.get() if to_cpu_memory else x
def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
import cupy as cu

x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order)
return x.get(order="A") if to_cpu_memory else x


def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]:
if TYPE_CHECKING:
from scipy.sparse._base import _spbase

assert isinstance(x, _spbase | types.CSDataset)

if order in {"K", "A"}:
order = "F" if x.format == "csc" else "C"
return cast("Literal['C', 'F']", order)
46 changes: 39 additions & 7 deletions tests/test_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


if TYPE_CHECKING:
from typing import TypeAlias
from typing import Literal, TypeAlias

from fast_array_utils.typing import CpuArray, DiskArray, GpuArray
from testing.fast_array_utils import ArrayType
Expand All @@ -25,34 +25,66 @@


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}:
with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"):
to_dense(x, to_cpu_memory=to_cpu_memory)
to_dense(x, order=order, to_cpu_memory=to_cpu_memory)
return

with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext():
arr = to_dense(x, to_cpu_memory=to_cpu_memory)
arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
assert_expected_order(x, arr, order=order)


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)

with WARNS_NUMBA if not find_spec("numba") else nullcontext():
arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory)
arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
assert_expected_order(src_mtx, arr, order=order)


def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None:
match (to_cpu_memory, orig):
case False, types.DaskArray():
assert isinstance(converted, types.DaskArray)
assert_expected_cls(orig._meta, converted._meta, to_cpu_memory=to_cpu_memory) # noqa: SLF001
assert_expected_cls(orig.compute(), converted.compute(), to_cpu_memory=to_cpu_memory)
case False, types.CupyArray() | types.CupySpMatrix():
assert isinstance(converted, types.CupyArray)
case _:
assert isinstance(converted, np.ndarray)


def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None:
match converted:
case types.CupyArray() | np.ndarray():
order_expected = get_order(orig) if order == "K" else order
assert converted.flags.c_contiguous == (order_expected == "C")
assert converted.flags.f_contiguous == (order_expected == "F")
case types.DaskArray():
assert_expected_order(orig, converted.compute(), order=order)
case _:
pytest.fail(f"Unsupported array type: {type(converted)}")


def get_order(orig: Array) -> Literal["C", "F"]:
match orig:
case np.ndarray() | types.CupyArray():
return "C" if orig.flags.c_contiguous else "F"
case _ if isinstance(orig, types.CSBase | types.CupyCSMatrix | types.CSDataset):
return "C" if orig.format == "csr" else "F"
case types.DaskArray():
return get_order(orig.compute())
case types.ZarrArray() | types.H5Dataset():
return "C"
pytest.fail(f"Unsupported array type: {type(orig)}")
7 changes: 6 additions & 1 deletion typings/cupy/_core/core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,21 @@ from types import EllipsisType
from typing import Any, Literal, Self, overload

import numpy as np
from cupy.cuda import Stream
from numpy._core.multiarray import flagsobj
from numpy.typing import NDArray

class ndarray:
dtype: np.dtype[Any]
shape: tuple[int, ...]
size: int
ndim: int
flags: flagsobj

# cupy-specific
def get(self) -> NDArray[Any]: ...
def get(
self, stream: Stream | None = None, order: Literal["C", "F", "A"] = "C", out: NDArray[Any] | None = None, blocking: bool = True
) -> NDArray[Any]: ...

# operators
def __array__(self) -> NDArray[Any]: ...
Expand Down
Loading