Description
What happened?
I have a data array of datetime values and I want to get the first value for each group. If there are any missing groups, the operation fails as numpy can't promote datetime data to float.
This is new in xarray 2025.3.
What did you expect to happen?
I expected to receive the first value for each group and NaT for missing groups.
Minimal Complete Verifiable Example
import xarray as xr
# A datetime array
time = xr.DataArray(xr.date_range('2000-01-01', periods=400, freq='D'), dims=('time',))
# Remove december, so there is a missing group
time = time.sel(time=time.dt.month != 12)
time.resample(time='MS').first()
MVCE confirmation
- Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- Complete example — the example is self-contained, including all data and the text of any traceback.
- Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- New issue — a search of GitHub Issues suggests this is not a duplicate.
- Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Relevant log output
---------------------------------------------------------------------------
DTypePromotionError Traceback (most recent call last)
Cell In[41], line 1
----> 1 time.resample(time='MS').first()
File ~/Projets/xarray/xarray/core/groupby.py:1420, in GroupBy.first(self, skipna, keep_attrs)
1401 def first(
1402 self, skipna: bool | None = None, keep_attrs: bool | None = None
1403 ) -> T_Xarray:
1404 """
1405 Return the first element of each group along the group dimension
1406
(...) 1418
1419 """
-> 1420 return self._first_or_last("first", skipna, keep_attrs)
File ~/Projets/xarray/xarray/core/resample.py:114, in Resample._first_or_last(self, op, skipna, keep_attrs)
109 def _first_or_last(
110 self, op: Literal["first", "last"], skipna: bool | None, keep_attrs: bool | None
111 ) -> T_Xarray:
112 from xarray.core.dataset import Dataset
--> 114 result = super()._first_or_last(op=op, skipna=skipna, keep_attrs=keep_attrs)
115 if isinstance(result, Dataset):
116 # Can't do this in the base class because group_dim is RESAMPLE_DIM
117 # which is not present in the original object
118 for var in result.data_vars:
File ~/Projets/xarray/xarray/core/groupby.py:1389, in GroupBy._first_or_last(self, op, skipna, keep_attrs)
1383 keep_attrs = _get_keep_attrs(default=True)
1384 if (
1385 module_available("flox", minversion="0.10.0")
1386 and OPTIONS["use_flox"]
1387 and contains_only_chunked_or_numpy(self._obj)
1388 ):
-> 1389 result = self._flox_reduce(
1390 dim=None, func=op, skipna=skipna, keep_attrs=keep_attrs
1391 )
1392 else:
1393 result = self.reduce(
1394 getattr(duck_array_ops, op),
1395 dim=[self._group_dim],
1396 skipna=skipna,
1397 keep_attrs=keep_attrs,
1398 )
File ~/Projets/xarray/xarray/core/resample.py:59, in Resample._flox_reduce(self, dim, keep_attrs, **kwargs)
52 def _flox_reduce(
53 self,
54 dim: Dims,
55 keep_attrs: bool | None = None,
56 **kwargs,
57 ) -> T_Xarray:
58 result: T_Xarray = (
---> 59 super()
60 ._flox_reduce(dim=dim, keep_attrs=keep_attrs, **kwargs)
61 .rename({RESAMPLE_DIM: self._group_dim}) # type: ignore[assignment]
62 )
63 return result
File ~/Projets/xarray/xarray/core/groupby.py:1099, in GroupBy._flox_reduce(self, dim, keep_attrs, **kwargs)
1097 from IPython import embed
1098 embed()
-> 1099 result = xarray_reduce(
1100 obj.drop_vars(non_numeric.keys()),
1101 *codes,
1102 dim=parsed_dim,
1103 expected_groups=expected_groups,
1104 isbin=False,
1105 keep_attrs=keep_attrs,
1106 **kwargs,
1107 )
1109 # we did end up reducing over dimension(s) that are
1110 # in the grouped variable
1111 group_dims = set(grouper.group.dims)
File ~/miniforge3/envs/xclim-dev/lib/python3.13/site-packages/flox/xarray.py:410, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, fill_value, dtype, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
407 output_sizes = group_sizes
408 output_sizes.update({dim.name: dim.size for dim in newdims if dim.size != 0})
--> 410 actual = xr.apply_ufunc(
411 wrapper,
412 ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
413 *by_da,
414 input_core_dims=input_core_dims,
415 # for xarray's test_groupby_duplicate_coordinate_labels
416 exclude_dims=set(dim_tuple),
417 output_core_dims=[output_core_dims],
418 dask="allowed",
419 dask_gufunc_kwargs=dict(
420 output_sizes=output_sizes,
421 output_dtypes=[dtype] if dtype is not None else None,
422 ),
423 keep_attrs=keep_attrs,
424 kwargs={
425 "func": func,
426 "axis": axis,
427 "sort": sort,
428 "fill_value": fill_value,
429 "method": method,
430 "min_count": min_count,
431 "skipna": skipna,
432 "engine": engine,
433 "reindex": reindex,
434 "expected_groups": tuple(expected_groups_valid_list),
435 "isbin": isbins,
436 "finalize_kwargs": finalize_kwargs,
437 "dtype": dtype,
438 "core_dims": input_core_dims,
439 },
440 )
442 # restore non-dim coord variables without the core dimension
443 # TODO: shouldn't apply_ufunc handle this?
444 for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims):
File ~/Projets/xarray/xarray/computation/apply_ufunc.py:1255, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
1253 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
1254 elif any(is_dict_like(a) for a in args):
-> 1255 return apply_dataset_vfunc(
1256 variables_vfunc,
1257 *args,
1258 signature=signature,
1259 join=join,
1260 exclude_dims=exclude_dims,
1261 dataset_join=dataset_join,
1262 fill_value=dataset_fill_value,
1263 keep_attrs=keep_attrs,
1264 on_missing_core_dim=on_missing_core_dim,
1265 )
1266 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
1267 elif any(isinstance(a, DataArray) for a in args):
File ~/Projets/xarray/xarray/computation/apply_ufunc.py:526, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *args)
521 list_of_coords, list_of_indexes = build_output_coords_and_indexes(
522 args, signature, exclude_dims, combine_attrs=keep_attrs
523 )
524 args = tuple(getattr(arg, "data_vars", arg) for arg in args)
--> 526 result_vars = apply_dict_of_variables_vfunc(
527 func,
528 *args,
529 signature=signature,
530 join=dataset_join,
531 fill_value=fill_value,
532 on_missing_core_dim=on_missing_core_dim,
533 )
535 out: Dataset | tuple[Dataset, ...]
536 if signature.num_outputs > 1:
File ~/Projets/xarray/xarray/computation/apply_ufunc.py:450, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, on_missing_core_dim, *args)
448 core_dim_present = _check_core_dims(signature, variable_args, name)
449 if core_dim_present is True:
--> 450 result_vars[name] = func(*variable_args)
451 else:
452 if on_missing_core_dim == "raise":
File ~/Projets/xarray/xarray/computation/apply_ufunc.py:821, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
816 if vectorize:
817 func = _vectorize(
818 func, signature, output_dtypes=output_dtypes, exclude_dims=exclude_dims
819 )
--> 821 result_data = func(*input_data)
823 if signature.num_outputs == 1:
824 result_data = (result_data,)
File ~/miniforge3/envs/xclim-dev/lib/python3.13/site-packages/flox/xarray.py:367, in xarray_reduce.<locals>.wrapper(array, func, skipna, core_dims, *by, **kwargs)
364 if "nan" not in func and func not in ["all", "any", "count"]:
365 func = f"nan{func}"
--> 367 result, *groups = groupby_reduce(array, *by, func=func, **kwargs)
369 # Transpose the new quantile dimension to the end. This is ugly.
370 # but new core dimensions are expected at the end :/
371 # but groupby_reduce inserts them at the beginning
372 if func in ["quantile", "nanquantile"]:
File ~/miniforge3/envs/xclim-dev/lib/python3.13/site-packages/flox/core.py:2559, in groupby_reduce(array, func, expected_groups, sort, isbin, axis, fill_value, dtype, min_count, method, engine, reindex, finalize_kwargs, *by)
2556 fill_value = np.nan
2558 kwargs = dict(axis=axis_, fill_value=fill_value)
-> 2559 agg = _initialize_aggregation(func, dtype, array.dtype, fill_value, min_count_, finalize_kwargs)
2561 # Need to set this early using `agg`
2562 # It cannot be done in the core loop of chunk_reduce
2563 # since we "prepare" the data for flox.
2564 kwargs["engine"] = _choose_engine(by_, agg) if engine is None else engine
File ~/miniforge3/envs/xclim-dev/lib/python3.13/site-packages/flox/aggregations.py:809, in _initialize_aggregation(func, dtype, array_dtype, fill_value, min_count, finalize_kwargs)
804 # np.dtype(None) == np.dtype("float64")!!!
805 # so check for not None
806 dtype_: np.dtype | None = (
807 np.dtype(dtype) if dtype is not None and not isinstance(dtype, np.dtype) else dtype
808 )
--> 809 final_dtype = dtypes._normalize_dtype(
810 dtype_ or agg.dtype_init["final"], array_dtype, agg.preserves_dtype, fill_value
811 )
812 agg.dtype = {
813 "user": dtype, # Save to automatically choose an engine
814 "final": final_dtype,
(...) 823 ),
824 }
826 # Replace sentinel fill values according to dtype
File ~/miniforge3/envs/xclim-dev/lib/python3.13/site-packages/flox/xrdtypes.py:171, in _normalize_dtype(dtype, array_dtype, preserves_dtype, fill_value)
169 dtype = np.dtype(dtype)
170 if fill_value not in [None, INF, NINF, NA]:
--> 171 dtype = np.result_type(dtype, fill_value)
172 return dtype
DTypePromotionError: The DType <class 'numpy.dtypes.DateTime64DType'> could not be promoted by <class 'numpy.dtypes._PyFloatDType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes._PyFloatDType'>)
Anything else we need to know?
This is was introduced by #10148 I believe, but the reason is that the _flox_reduce
method assumes np.nan
as a fill value when groups are missing:
Line 1105 in 4174aa1
It also fails for cftime data, but I think this is another issue internal to flox.
Deactivating flox fixes both issues ( xr.set_options(use_flox=False)
).
Environment
INSTALLED VERSIONS
commit: fd7c765
python: 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0]
python-bits: 64
OS: Linux
OS-release: 6.12.12-200.fc41.x86_64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: fr_CA.UTF-8
LOCALE: ('fr_CA', 'UTF-8')
libhdf5: 1.14.4
libnetcdf: 4.9.2
xarray: 2025.3.1.dev5+gfd7c7656.d20250324
pandas: 2.2.3
numpy: 2.1.3
scipy: 1.15.2
netCDF4: 1.7.2
pydap: None
h5netcdf: 1.6.1
h5py: 3.12.1
zarr: None
cftime: 1.6.4
nc_time_axis: 1.4.1
iris: None
bottleneck: 1.4.2
dask: 2025.3.0
distributed: 2025.3.0
matplotlib: 3.10.1
cartopy: None
seaborn: None
numbagg: None
fsspec: 2025.3.0
cupy: None
pint: 0.24.4
sparse: None
flox: 0.10.0
numpy_groupies: 0.11.2
setuptools: 75.8.2
pip: 25.0.1
conda: None
pytest: 8.3.5
mypy: 1.15.0
IPython: 9.0.2
sphinx: 8.1.