Merge branch 'main' into rel-2025.1.2

kmuehlbauer · web-flow · commit 4ba053818d0f · 2025-01-30T18:53:21.000+01:00
diff --git a/doc/api.rst b/doc/api.rst
@@ -10,7 +10,7 @@ This page provides an auto-generated summary of xarray's API. For more details
 and examples, refer to the relevant chapters in the main part of the
 documentation.
 
-See also: :ref:`public api`
+See also: :ref:`public-api` and :ref:`api-stability`.
 
 Top-level functions
 ===================
diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
@@ -416,7 +416,19 @@ would certainly appreciate it. We recommend two citations.
                  url    = {https://doi.org/10.5281/zenodo.59499}
                 }
 
-.. _public api:
+.. _api-stability:
+
+How stable is Xarray's API?
+---------------------------
+
+Xarray tries very hard to maintain backwards compatibility in our :ref:`api` between released versions.
+Whilst we do occasionally make breaking changes in order to improve the library,
+we `signpost changes <https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility>`_ with ``DeprecationWarnings`` for many releases in advance.
+(An exception is bugs - whose behaviour we try to fix as soon as we notice them.)
+Our `test-driven development practices <https://docs.xarray.dev/en/stable/contributing.html#test-driven-development-code-writing>`_ helps to ensure any accidental regressions are caught.
+This philosophy applies to everything in the `public API <https://docs.xarray.dev/en/stable/getting-started-guide/faq.html#what-parts-of-xarray-are-considered-public-api>`_.
+
+.. _public-api:
 
 What parts of xarray are considered public API?
 -----------------------------------------------
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -108,11 +108,17 @@ Bug fixes
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 - Fix weighted ``polyfit`` for arrays with more than two dimensions (:issue:`9972`, :pull:`9974`).
   By `Mattia Almansi <https://github.com/malmans2>`_.
+- Preserve order of variables in :py:func:`xarray.combine_by_coords` (:issue:`8828`, :pull:`9070`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Cast ``numpy`` scalars to arrays in :py:meth:`NamedArray.from_arrays` (:issue:`10005`, :pull:`10008`)
+  By `Justus Magin <https://github.com/keewis>`_.
 
 Documentation
 ~~~~~~~~~~~~~
 - A chapter on :ref:`internals.timecoding` is added to the internal section (:pull:`9618`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Clarified xarray's policy on API stability in the FAQ. (:issue:`9854`, :pull:`9855`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
-import itertools
-from collections import Counter
-from collections.abc import Iterable, Iterator, Sequence
+from collections import Counter, defaultdict
+from collections.abc import Callable, Hashable, Iterable, Iterator, Sequence
 from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast
 
 import pandas as pd
@@ -269,10 +268,7 @@ def _combine_all_along_first_dim(
     combine_attrs: CombineAttrsOptions = "drop",
 ):
     # Group into lines of datasets which must be combined along dim
-    # need to sort by _new_tile_id first for groupby to work
-    # TODO: is the sorted need?
-    combined_ids = dict(sorted(combined_ids.items(), key=_new_tile_id))
-    grouped = itertools.groupby(combined_ids.items(), key=_new_tile_id)
+    grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id)
 
     # Combine all of these datasets along dim
     new_combined_ids = {}
@@ -606,6 +602,21 @@ def vars_as_keys(ds):
     return tuple(sorted(ds))
 
 
+K = TypeVar("K", bound=Hashable)
+
+
+def groupby_defaultdict(
+    iter: list[T],
+    key: Callable[[T], K],
+) -> Iterator[tuple[K, Iterator[T]]]:
+    """replacement for itertools.groupby"""
+    idx = defaultdict(list)
+    for i, obj in enumerate(iter):
+        idx[key(obj)].append(i)
+    for k, ix in idx.items():
+        yield k, (iter[i] for i in ix)
+
+
 def _combine_single_variable_hypercube(
     datasets,
     fill_value=dtypes.NA,
@@ -965,8 +976,7 @@ def combine_by_coords(
         ]
 
         # Group by data vars
-        sorted_datasets = sorted(data_objects, key=vars_as_keys)
-        grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
+        grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
 
         # Perform the multidimensional combine on each group of data variables
         # before merging back together
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -9141,7 +9141,7 @@ def polyfit(
         lhs = np.vander(x, order)
 
         if rcond is None:
-            rcond = x.shape[0] * np.finfo(x.dtype).eps  # type: ignore[assignment]
+            rcond = x.shape[0] * np.finfo(x.dtype).eps
 
         # Weights:
         if w is not None:
diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
@@ -62,7 +62,7 @@ def maybe_promote(dtype: np.dtype) -> tuple[np.dtype, Any]:
     # N.B. these casting rules should match pandas
     dtype_: np.typing.DTypeLike
     fill_value: Any
-    if HAS_STRING_DTYPE and np.issubdtype(dtype, np.dtypes.StringDType()):  # type: ignore[attr-defined]
+    if HAS_STRING_DTYPE and np.issubdtype(dtype, np.dtypes.StringDType()):
         # for now, we always promote string dtypes to object for consistency with existing behavior
         # TODO: refactor this once we have a better way to handle numpy vlen-string dtypes
         dtype_ = object
diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
@@ -205,7 +205,7 @@ def from_array(
 
         return NamedArray(dims, data, attrs)
 
-    if isinstance(data, _arrayfunction_or_api):
+    if isinstance(data, _arrayfunction_or_api) and not isinstance(data, np.generic):
         return NamedArray(dims, data, attrs)
 
     if isinstance(data, tuple):
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
@@ -1043,6 +1043,20 @@ def test_combine_by_coords_incomplete_hypercube(self):
         with pytest.raises(ValueError):
             combine_by_coords([x1, x2, x3], fill_value=None)
 
+    def test_combine_by_coords_override_order(self) -> None:
+        # regression test for https://github.com/pydata/xarray/issues/8828
+        x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
+        x2 = Dataset(
+            {"a": (("y", "x"), [[2]]), "b": (("y", "x"), [[1]])},
+            coords={"y": [0], "x": [0]},
+        )
+        actual = combine_by_coords([x1, x2], compat="override")
+        assert_equal(actual["a"], actual["b"])
+        assert_equal(actual["a"], x1["a"])
+
+        actual = combine_by_coords([x2, x1], compat="override")
+        assert_equal(actual["a"], x2["a"])
+
 
 class TestCombineMixedObjectsbyCoords:
     def test_combine_by_coords_mixed_unnamed_dataarrays(self):
diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py
@@ -591,6 +591,13 @@ def test_warn_on_repeated_dimension_names(self) -> None:
         with pytest.warns(UserWarning, match="Duplicate dimension names"):
             NamedArray(("x", "x"), np.arange(4).reshape(2, 2))
 
+    def test_aggregation(self) -> None:
+        x: NamedArray[Any, np.dtype[np.int64]]
+        x = NamedArray(("x", "y"), np.arange(4).reshape(2, 2))
+
+        result = x.sum()
+        assert isinstance(result.data, np.ndarray)
+
 
 def test_repr() -> None:
     x: NamedArray[Any, np.dtype[np.uint64]]