|
8 | 8 | - The SCALLOPS development team |
9 | 9 | """ |
10 | 10 |
|
11 | | -import importlib |
12 | 11 | import itertools |
13 | 12 | from collections.abc import Callable, Sequence |
14 | 13 | from typing import Any, Literal, Union |
15 | 14 |
|
16 | | -import dask |
17 | | -import dask.array as da |
18 | 15 | import numpy as np |
19 | 16 | import xarray as xr |
20 | 17 |
|
@@ -92,73 +89,6 @@ def _get_dims( |
92 | 89 | return _dims |
93 | 90 |
|
94 | 91 |
|
95 | | -def dask_grouped_quantiles( |
96 | | - array: xr.DataArray, dims: list[str], q: list[float] |
97 | | -) -> xr.DataArray: |
98 | | - """Compute quantiles for grouped data using Dask. |
99 | | -
|
100 | | - This function calculates the specified quantiles for the given dimensions in a |
101 | | - Dask-backed Xarray DataArray. It uses Dask's percentile computation to handle |
102 | | - large datasets efficiently. |
103 | | -
|
104 | | - :param array: The input DataArray containing the data. |
105 | | - :param dims: List of dimensions over which to compute the quantiles. |
106 | | - :param q: List of quantiles to compute, each value should be between 0 and 1. |
107 | | - :return: A DataArray containing the computed quantiles for the specified dimensions. |
108 | | -
|
109 | | - :raises AssertionError: |
110 | | - If no quantiles are provided in the `q` list. |
111 | | - :raises ValueError: |
112 | | - If a specified dimension is not found in the DataArray and `missing_dims` |
113 | | - is set to "error". |
114 | | -
|
115 | | - :example: |
116 | | -
|
117 | | - .. code-block:: python |
118 | | -
|
119 | | - import xarray as xr |
120 | | - import numpy as np |
121 | | - import dask.array as da |
122 | | -
|
123 | | - data = da.random.random((10, 20, 30), chunks=(5, 10, 15)) |
124 | | - array = xr.DataArray(data, dims=["x", "y", "z"]) |
125 | | -
|
126 | | - # Compute quantiles for dimensions 'x' and 'y' |
127 | | - quantiles = dask_grouped_quantiles(array, ["x", "y"], [0.25, 0.5, 0.75]) |
128 | | - print(quantiles) |
129 | | - """ |
130 | | - assert len(q) > 0, "No quantiles provided" |
131 | | - dims = _get_dims(array, dims) |
132 | | - |
133 | | - coords = {d: array.coords[d] for d in dims} |
134 | | - coords["quantile"] = q |
135 | | - |
136 | | - quantiles = [_q * 100 for _q in q] |
137 | | - results = xr.DataArray( |
138 | | - da.zeros((len(q),) + tuple([array.sizes[d] for d in dims])), |
139 | | - dims=["quantile"] + dims, |
140 | | - coords=coords, |
141 | | - ) |
142 | | - |
143 | | - dim_vals = [array[d].values for d in dims] |
144 | | - internal_method = "tdigest" |
145 | | - |
146 | | - try: |
147 | | - importlib.import_module("crick") |
148 | | - except ModuleNotFoundError: |
149 | | - internal_method = "default" |
150 | | - with dask.config.set(**{"array.slicing.split_large_chunks": True}): |
151 | | - for dim_val in itertools.product(*dim_vals): |
152 | | - sel = dict(zip(dims, dim_val)) |
153 | | - values = da.percentile( |
154 | | - array.sel(sel).data.reshape(-1), |
155 | | - quantiles, |
156 | | - internal_method=internal_method, |
157 | | - ) |
158 | | - results.loc[sel] = values |
159 | | - return results |
160 | | - |
161 | | - |
162 | 92 | def apply_data_array( |
163 | 93 | array: xr.DataArray, |
164 | 94 | dims: list[str], |
|
0 commit comments