pydata · dcherian · May 7, 2025 · Jun 28, 2024 · Sep 20, 2024 · Sep 22, 2024
diff --git a/doc/api.rst b/doc/api.rst
@@ -1329,6 +1329,8 @@ Grouper Objects
    groupers.BinGrouper
    groupers.UniqueGrouper
    groupers.TimeResampler
+   groupers.SeasonGrouper
+   groupers.SeasonResampler
 
 
 Rolling objects

diff --git a/doc/conf.py b/doc/conf.py
@@ -182,6 +182,8 @@
     "pd.NaT": "~pandas.NaT",
 }
 
+autodoc_type_aliases = napoleon_type_aliases  # Keep both in sync
+
 # mermaid config
 mermaid_version = "10.9.1"
 

diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst
@@ -332,6 +332,14 @@ Different groupers can be combined to construct sophisticated GroupBy operations
     ds.groupby(x=BinGrouper(bins=[5, 15, 25]), letters=UniqueGrouper()).sum()
 
 
+Time Grouping and Resampling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. seealso::
+
+   See :ref:`resampling`.
+
+
 Shuffling
 ~~~~~~~~~
 

diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst
@@ -1,3 +1,5 @@
+.. currentmodule:: xarray
+
 .. _time-series:
 
 ================
@@ -21,26 +23,19 @@ core functionality.
 Creating datetime64 data
 ------------------------
 
-Xarray uses the numpy dtypes ``datetime64[unit]`` and ``timedelta64[unit]``
-(where unit is one of ``"s"``, ``"ms"``, ``"us"`` and ``"ns"``) to represent datetime
+Xarray uses the numpy dtypes :py:class:`numpy.datetime64` and :py:class:`numpy.timedelta64`
+with specified units (one of ``"s"``, ``"ms"``, ``"us"`` and ``"ns"``) to represent datetime
 data, which offer vectorized operations with numpy and smooth integration with pandas.
 
-To convert to or create regular arrays of ``datetime64`` data, we recommend
-using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`:
+To convert to or create regular arrays of :py:class:`numpy.datetime64` data, we recommend
+using :py:func:`pandas.to_datetime`, :py:class:`pandas.DatetimeIndex`, or :py:func:`xarray.date_range`:
 
 .. ipython:: python
 
     pd.to_datetime(["2000-01-01", "2000-02-02"])
     pd.DatetimeIndex(
         ["2000-01-01 00:00:00", "2000-02-02 00:00:00"], dtype="datetime64[s]"
     )
-    pd.date_range("2000-01-01", periods=365)
-    pd.date_range("2000-01-01", periods=365, unit="s")
-
-It is also possible to use corresponding :py:func:`xarray.date_range`:
-
-.. ipython:: python
-
     xr.date_range("2000-01-01", periods=365)
     xr.date_range("2000-01-01", periods=365, unit="s")
 
@@ -81,7 +76,7 @@ attribute like ``'days since 2000-01-01'``).
 
 
 You can manual decode arrays in this form by passing a dataset to
-:py:func:`~xarray.decode_cf`:
+:py:func:`decode_cf`:
 
 .. ipython:: python
 
@@ -93,8 +88,8 @@ You can manual decode arrays in this form by passing a dataset to
     coder = xr.coders.CFDatetimeCoder(time_unit="s")
     xr.decode_cf(ds, decode_times=coder)
 
-From xarray 2025.01.2 the resolution of the dates can be one of ``"s"``, ``"ms"``, ``"us"`` or ``"ns"``. One limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262, which gets increased significantly with lower resolutions. When a store contains dates outside of these bounds (or dates < `1582-10-15`_ with a Gregorian, also known as standard, calendar), dates will be returned as arrays of :py:class:`cftime.datetime` objects and a :py:class:`~xarray.CFTimeIndex` will be used for indexing.
-:py:class:`~xarray.CFTimeIndex` enables most of the indexing functionality of a :py:class:`pandas.DatetimeIndex`.
+From xarray 2025.01.2 the resolution of the dates can be one of ``"s"``, ``"ms"``, ``"us"`` or ``"ns"``. One limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262, which gets increased significantly with lower resolutions. When a store contains dates outside of these bounds (or dates < `1582-10-15`_ with a Gregorian, also known as standard, calendar), dates will be returned as arrays of :py:class:`cftime.datetime` objects and a :py:class:`CFTimeIndex` will be used for indexing.
+:py:class:`CFTimeIndex` enables most of the indexing functionality of a :py:class:`pandas.DatetimeIndex`.
 See :ref:`CFTimeIndex` for more information.
 
 Datetime indexing
@@ -205,35 +200,37 @@ You can also search for multiple months (in this case January through March), us
 Resampling and grouped operations
 ---------------------------------
 
-Datetime components couple particularly well with grouped operations (see
-:ref:`groupby`) for analyzing features that repeat over time. Here's how to
-calculate the mean by time of day:
+
+.. seealso::
+
+   For more generic documentation on grouping, see :ref:`groupby`.
+
+
+Datetime components couple particularly well with grouped operations for analyzing features that repeat over time.
+Here's how to calculate the mean by time of day:
 
 .. ipython:: python
-    :okwarning:
 
     ds.groupby("time.hour").mean()
 
 For upsampling or downsampling temporal resolutions, xarray offers a
-:py:meth:`~xarray.Dataset.resample` method building on the core functionality
+:py:meth:`Dataset.resample` method building on the core functionality
 offered by the pandas method of the same name. Resample uses essentially the
-same api as ``resample`` `in pandas`_.
+same api as :py:meth:`pandas.DataFrame.resample` `in pandas`_.
 
 .. _in pandas: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling
 
 For example, we can downsample our dataset from hourly to 6-hourly:
 
 .. ipython:: python
-    :okwarning:
 
     ds.resample(time="6h")
 
-This will create a specialized ``Resample`` object which saves information
-necessary for resampling. All of the reduction methods which work with
-``Resample`` objects can also be used for resampling:
+This will create a specialized :py:class:`~xarray.core.resample.DatasetResample` or :py:class:`~xarray.core.resample.DataArrayResample`
+object which saves information necessary for resampling. All of the reduction methods which work with
+:py:class:`Dataset` or :py:class:`DataArray` objects can also be used for resampling:
 
 .. ipython:: python
-    :okwarning:
 
     ds.resample(time="6h").mean()
 
@@ -252,7 +249,7 @@ by specifying the ``dim`` keyword argument
     ds.resample(time="6h").mean(dim=["time", "latitude", "longitude"])
 
 For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``,
-``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d``
+``nearest`` and ``interpolate``. ``interpolate`` extends :py:func:`scipy.interpolate.interp1d`
 and supports all of its schemes. All of these resampling operations work on both
 Dataset and DataArray objects with an arbitrary number of dimensions.
 
@@ -266,9 +263,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``.
 
 It is often desirable to center the time values after a resampling operation.
 That can be accomplished by updating the resampled dataset time coordinate values
-using time offset arithmetic via the `pandas.tseries.frequencies.to_offset`_ function.
-
-.. _pandas.tseries.frequencies.to_offset: https://pandas.pydata.org/docs/reference/api/pandas.tseries.frequencies.to_offset.html
+using time offset arithmetic via the :py:func:`pandas.tseries.frequencies.to_offset` function.
 
 .. ipython:: python
 
@@ -277,5 +272,80 @@ using time offset arithmetic via the `pandas.tseries.frequencies.to_offset`_ fun
     resampled_ds["time"] = resampled_ds.get_index("time") + offset
     resampled_ds
 
-For more examples of using grouped operations on a time dimension, see
-:doc:`../examples/weather-data`.
+
+.. seealso::
+
+   For more examples of using grouped operations on a time dimension, see :doc:`../examples/weather-data`.
+
+
+Handling Seasons
+~~~~~~~~~~~~~~~~
+
+Two extremely common time series operations are to group by seasons, and resample to a seasonal frequency.
+Xarray has historically supported some simple versions of these computations.
+For example, ``.groupby("time.season")`` (where the seasons are DJF, MAM, JJA, SON)
+and resampling to a seasonal frequency using Pandas syntax: ``.resample(time="QS-DEC")``.
+
+Quite commonly one wants more flexibility in defining seasons. For these use-cases, Xarray provides
+:py:class:`groupers.SeasonGrouper` and :py:class:`groupers.SeasonResampler`.
+
+
+.. currentmodule:: xarray.groupers
+
+.. ipython:: python
+
+    from xarray.groupers import SeasonGrouper
+
+    ds.groupby(time=SeasonGrouper(["DJF", "MAM", "JJA", "SON"])).mean()
+
+
+Note how the seasons are in the specified order, unlike ``.groupby("time.season")`` where the
+seasons are sorted alphabetically.
+
+.. ipython:: python
+
+    ds.groupby("time.season").mean()
+
+
+:py:class:`SeasonGrouper` supports overlapping seasons:
+
+.. ipython:: python
+
+    ds.groupby(time=SeasonGrouper(["DJFM", "MAMJ", "JJAS", "SOND"])).mean()
+
+
+Skipping months is allowed:
+
+.. ipython:: python
+
+    ds.groupby(time=SeasonGrouper(["JJAS"])).mean()
+
+
+Use :py:class:`SeasonResampler` to specify custom seasons.
+
+.. ipython:: python
+
+    from xarray.groupers import SeasonResampler
+
+    ds.resample(time=SeasonResampler(["DJF", "MAM", "JJA", "SON"])).mean()
+
+
+:py:class:`SeasonResampler` is smart enough to correctly handle years for seasons that
+span the end of the year (e.g. DJF). By default :py:class:`SeasonResampler` will skip any
+season that is incomplete (e.g. the first DJF season for a time series that starts in Jan).
+Pass the ``drop_incomplete=False`` kwarg to :py:class:`SeasonResampler` to disable this behaviour.
+
+.. ipython:: python
+
+    from xarray.groupers import SeasonResampler
+
+    ds.resample(
+        time=SeasonResampler(["DJF", "MAM", "JJA", "SON"], drop_incomplete=False)
+    ).mean()
+
+
+Seasons need not be of the same length:
+
+.. ipython:: python
+
+    ds.resample(time=SeasonResampler(["JF", "MAM", "JJAS", "OND"])).mean()
diff --git a/properties/test_properties.py b/properties/test_properties.py
@@ -1,11 +1,15 @@
+import itertools
+
 import pytest
 
 pytest.importorskip("hypothesis")
 
-from hypothesis import given
+import hypothesis.strategies as st
+from hypothesis import given, note
 
 import xarray as xr
 import xarray.testing.strategies as xrst
+from xarray.groupers import find_independent_seasons, season_to_month_tuple
 
 
 @given(attrs=xrst.simple_attrs)
@@ -15,3 +19,45 @@ def test_assert_identical(attrs):
 
     ds = xr.Dataset(attrs=attrs)
     xr.testing.assert_identical(ds, ds.copy(deep=True))
+
+
+@given(
+    roll=st.integers(min_value=0, max_value=12),
+    breaks=st.lists(
+        st.integers(min_value=0, max_value=11), min_size=1, max_size=12, unique=True
+    ),
+)
+def test_property_season_month_tuple(roll, breaks):
+    chars = list("JFMAMJJASOND")
+    months = tuple(range(1, 13))
+
+    rolled_chars = chars[roll:] + chars[:roll]
+    rolled_months = months[roll:] + months[:roll]
+    breaks = sorted(breaks)
+    if breaks[0] != 0:
+        breaks = [0] + breaks
+    if breaks[-1] != 12:
+        breaks = breaks + [12]
+    seasons = tuple(
+        "".join(rolled_chars[start:stop]) for start, stop in itertools.pairwise(breaks)
+    )
+    actual = season_to_month_tuple(seasons)
+    expected = tuple(
+        rolled_months[start:stop] for start, stop in itertools.pairwise(breaks)
+    )
+    assert expected == actual
+
+
+@given(data=st.data(), nmonths=st.integers(min_value=1, max_value=11))
+def test_property_find_independent_seasons(data, nmonths):
+    chars = "JFMAMJJASOND"
+    # if stride > nmonths, then we can't infer season order
+    stride = data.draw(st.integers(min_value=1, max_value=nmonths))
+    chars = chars + chars[:nmonths]
+    seasons = [list(chars[i : i + nmonths]) for i in range(0, 12, stride)]
+    note(seasons)
+    groups = find_independent_seasons(seasons)
+    for group in groups:
+        inds = tuple(itertools.chain(*group.inds))
+        assert len(inds) == len(set(inds))
+        assert len(group.codes) == len(set(group.codes))
diff --git a/pyproject.toml b/pyproject.toml
@@ -393,6 +393,8 @@ extend-ignore-identifiers-re = [
 [tool.typos.default.extend-words]
 # NumPy function names
 arange = "arange"
+ond = "ond"
+aso = "aso"
 
 # Technical terms
 nd = "nd"

diff --git a/xarray/compat/toolzcompat.py b/xarray/compat/toolzcompat.py
@@ -0,0 +1,56 @@
+# This file contains functions copied from the toolz library in accordance
+# with its license. The original copyright notice is duplicated below.
+
+# Copyright (c) 2013 Matthew Rocklin
+
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+#   a. Redistributions of source code must retain the above copyright notice,
+#      this list of conditions and the following disclaimer.
+#   b. Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#   c. Neither the name of toolz nor the names of its contributors
+#      may be used to endorse or promote products derived from this software
+#      without specific prior written permission.
+
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
+
+
+def sliding_window(n, seq):
+    """A sequence of overlapping subsequences
+
+    >>> list(sliding_window(2, [1, 2, 3, 4]))
+    [(1, 2), (2, 3), (3, 4)]
+
+    This function creates a sliding window suitable for transformations like
+    sliding means / smoothing
+
+    >>> mean = lambda seq: float(sum(seq)) / len(seq)
+    >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
+    [1.5, 2.5, 3.5]
+    """
+    import collections
+    import itertools
+
+    return zip(
+        *(
+            collections.deque(itertools.islice(it, i), 0) or it
+            for i, it in enumerate(itertools.tee(seq, n))
+        ),
+        strict=False,
+    )
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -6860,7 +6860,7 @@ def groupby(
 
         >>> da.groupby("letters")
         <DataArrayGroupBy, grouped over 1 grouper(s), 2 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'>
 
         Execute a reduction
 
@@ -6876,8 +6876,8 @@ def groupby(
 
         >>> da.groupby(["letters", "x"])
         <DataArrayGroupBy, grouped over 2 grouper(s), 8 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'
-            'x': 4/4 groups present with labels 10, 20, 30, 40>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'
+            'x': UniqueGrouper('x'), 4/4 groups with labels 10, 20, 30, 40>
 
         Use Grouper objects to express more complicated GroupBy operations