From d76361d6ef040d35882f490ee39b7d273214a70a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:58:42 +0200 Subject: [PATCH] Add a benchmark for take --- tests/benchmarks/test_array.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index 1b54b5d255..adf18b4470 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -5,6 +5,7 @@ import dask.array as da import numpy as np import pytest +from dask.core import flatten from dask.utils import parse_bytes from ..utils_test import ( @@ -257,3 +258,29 @@ def test_map_overlap_sample(small_client, new_array): x = new_array((10000, 10000), chunks=(50, 50)) # 40_000 19.5 kiB chunks y = x.map_overlap(lambda x: x, depth=1) y[5000:5010, 5000:5010].compute() + + +def _create_indexer(n, chunk_n): + idx = np.arange(0, n) + np.random.shuffle(idx[: n // 10]) + + indexer = [] + for i in range(0, n, chunk_n): + indexer.append(idx[i : i + chunk_n].tolist()) + return indexer + + +def test_take(small_client, new_array): + n = 2000 + chunk_n = 250 + x = new_array((n, n, n), chunks=(chunk_n, chunk_n, chunk_n)) + indexer = list(flatten(_create_indexer(n, chunk_n))) + x[:, indexer, :].sum().compute() + + +# def test_shuffle(small_client, new_array): +# n = 2000 +# chunk_n = 250 +# x = new_array((n, n, n), chunks=(chunk_n, chunk_n, chunk_n)) +# indexer = _create_indexer(n, chunk_n) +# x.shuffle(indexer, axis=1).sum().compute()