Skip to content

Commit 99f03c2

Browse files
dcherianIllviljan
andauthored
Fix binning by unsorted array (#7762)
* Fix binning by unsorted array Closes #7759 * Update xarray/tests/test_groupby.py Co-authored-by: Illviljan <[email protected]> --------- Co-authored-by: Illviljan <[email protected]>
1 parent 68bae08 commit 99f03c2

File tree

3 files changed

+40
-41
lines changed

3 files changed

+40
-41
lines changed

doc/whats-new.rst

+4-20
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,17 @@ What's New
1616
1717
1818
19-
.. _whats-new.2023.05.0:
19+
.. _whats-new.2023.04.1:
2020

21-
v2023.05.0 (unreleased)
21+
v2023.04.1 (unreleased)
2222
-----------------------
2323

24-
New Features
25-
~~~~~~~~~~~~
26-
27-
28-
Breaking changes
29-
~~~~~~~~~~~~~~~~
30-
31-
32-
Deprecations
33-
~~~~~~~~~~~~
34-
24+
This is a patch release to fix a bug with binning (:issue:`7759`)
3525

3626
Bug fixes
3727
~~~~~~~~~
3828

39-
40-
Documentation
41-
~~~~~~~~~~~~~
42-
43-
44-
Internal Changes
45-
~~~~~~~~~~~~~~~~
29+
- Fix binning by unsorted arrays. (:issue:`7759`)
4630

4731

4832
.. _whats-new.2023.04.0:

xarray/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def _factorize_bins(
338338
if (codes == -1).all():
339339
raise ValueError(f"None of the data falls within bins with edges {bins!r}")
340340
full_index = binned.categories
341-
unique_values = binned.unique().dropna()
341+
unique_values = np.sort(binned.unique().dropna())
342342
group_indices = [g for g in _codes_to_groups(codes, len(full_index)) if g]
343343

344344
if len(group_indices) == 0:

xarray/tests/test_groupby.py

+35-20
Original file line numberDiff line numberDiff line change
@@ -1371,36 +1371,51 @@ def test_groupby_multidim_map(self):
13711371
)
13721372
assert_identical(expected, actual)
13731373

1374-
def test_groupby_bins(self):
1375-
array = DataArray(np.arange(4), dims="dim_0")
1374+
@pytest.mark.parametrize("use_flox", [True, False])
1375+
@pytest.mark.parametrize("coords", [np.arange(4), np.arange(4)[::-1], [2, 0, 3, 1]])
1376+
def test_groupby_bins(self, coords: np.typing.ArrayLike, use_flox: bool) -> None:
1377+
array = DataArray(
1378+
np.arange(4), dims="dim_0", coords={"dim_0": coords}, name="a"
1379+
)
13761380
# the first value should not be part of any group ("right" binning)
13771381
array[0] = 99
13781382
# bins follow conventions for pandas.cut
13791383
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
13801384
bins = [0, 1.5, 5]
1381-
bin_coords = pd.cut(array["dim_0"], bins).categories
1382-
expected = DataArray(
1383-
[1, 5], dims="dim_0_bins", coords={"dim_0_bins": bin_coords}
1385+
1386+
df = array.to_dataframe()
1387+
df["dim_0_bins"] = pd.cut(array["dim_0"], bins)
1388+
1389+
expected_df = df.groupby("dim_0_bins").sum()
1390+
# TODO: can't convert df with IntervalIndex to Xarray
1391+
1392+
expected = (
1393+
expected_df.reset_index(drop=True)
1394+
.to_xarray()
1395+
.assign_coords(index=np.array(expected_df.index))
1396+
.rename({"index": "dim_0_bins"})["a"]
13841397
)
1385-
actual = array.groupby_bins("dim_0", bins=bins).sum()
1386-
assert_identical(expected, actual)
13871398

1388-
actual = array.groupby_bins("dim_0", bins=bins, labels=[1.2, 3.5]).sum()
1389-
assert_identical(expected.assign_coords(dim_0_bins=[1.2, 3.5]), actual)
1399+
with xr.set_options(use_flox=use_flox):
1400+
actual = array.groupby_bins("dim_0", bins=bins).sum()
1401+
assert_identical(expected, actual)
13901402

1391-
actual = array.groupby_bins("dim_0", bins=bins).map(lambda x: x.sum())
1392-
assert_identical(expected, actual)
1403+
actual = array.groupby_bins("dim_0", bins=bins, labels=[1.2, 3.5]).sum()
1404+
assert_identical(expected.assign_coords(dim_0_bins=[1.2, 3.5]), actual)
13931405

1394-
# make sure original array dims are unchanged
1395-
assert len(array.dim_0) == 4
1406+
actual = array.groupby_bins("dim_0", bins=bins).map(lambda x: x.sum())
1407+
assert_identical(expected, actual)
13961408

1397-
da = xr.DataArray(np.ones((2, 3, 4)))
1398-
bins = [-1, 0, 1, 2]
1399-
with xr.set_options(use_flox=False):
1400-
actual = da.groupby_bins("dim_0", bins).mean(...)
1401-
with xr.set_options(use_flox=True):
1402-
expected = da.groupby_bins("dim_0", bins).mean(...)
1403-
assert_allclose(actual, expected)
1409+
# make sure original array dims are unchanged
1410+
assert len(array.dim_0) == 4
1411+
1412+
da = xr.DataArray(np.ones((2, 3, 4)))
1413+
bins = [-1, 0, 1, 2]
1414+
with xr.set_options(use_flox=False):
1415+
actual = da.groupby_bins("dim_0", bins).mean(...)
1416+
with xr.set_options(use_flox=True):
1417+
expected = da.groupby_bins("dim_0", bins).mean(...)
1418+
assert_allclose(actual, expected)
14041419

14051420
def test_groupby_bins_empty(self):
14061421
array = DataArray(np.arange(4), [("x", range(4))])

0 commit comments

Comments
 (0)