pydata · kmuehlbauer · May 23, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 7, 2025
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -1368,13 +1368,14 @@ def __init__(
     def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         if np.issubdtype(variable.data.dtype, np.timedelta64):
             dims, data, attrs, encoding = unpack_for_encoding(variable)
-
-            data, units = encode_cf_timedelta(
-                data, encoding.pop("units", None), encoding.get("dtype", None)
-            )
-            safe_setitem(attrs, "units", units, name=name)
-
-            return Variable(dims, data, attrs, encoding, fastpath=True)
+            if "units" in encoding:
+                data, units = encode_cf_timedelta(
+                    data, encoding.pop("units"), encoding.get("dtype", None)
+                )
+                safe_setitem(attrs, "units", units, name=name)
+                return Variable(dims, data, attrs, encoding, fastpath=True)
+            else:
+                return variable
         else:
             return variable
 

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import typing
 import warnings
 from collections.abc import Callable, Hashable, MutableMapping
 from functools import partial
@@ -11,6 +12,7 @@
 import pandas as pd
 
 from xarray.core import dtypes, duck_array_ops, indexing
+from xarray.core.types import PDDatetimeUnitOptions
 from xarray.core.variable import Variable
 from xarray.namedarray.parallelcompat import get_chunked_array_type
 from xarray.namedarray.pycompat import is_chunked_array
@@ -161,6 +163,45 @@
         return np.asarray(self.array[key], dtype=self.dtype)
 
 
+class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
+    """Decode arrays on the fly from integer to np.timedelta64 datatype
+
+    This is useful for decoding timedelta64 arrays from integer typed netCDF
+    variables.
+
+    >>> x = np.array([1, 0, 1, 1, 0], dtype="int64")
+
+    >>> x.dtype
+    dtype('int64')
+
+    >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype
+    dtype('timedelta64[ns]')
+
+    >>> indexer = indexing.BasicIndexer((slice(None),))
+    >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype
+    dtype('timedelta64[ns]')
+    """
+
+    __slots__ = ("_dtype", "array")
+
+    def __init__(self, array, dtype: np.typing.DTypeLike) -> None:
+        self.array = indexing.as_indexable(array)
+        self._dtype = dtype
+
+    @property
+    def dtype(self):
+        return np.dtype(self._dtype)
+
+    def _oindex_get(self, key):
+        return np.asarray(self.array.oindex[key], dtype=self.dtype)
+
+    def _vindex_get(self, key):
+        return np.asarray(self.array.vindex[key], dtype=self.dtype)
+
+    def __getitem__(self, key) -> np.ndarray:
+        return np.asarray(self.array[key], dtype=self.dtype)
+
+
 def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike):
     """Lazily apply an element-wise function to an array.
     Parameters
@@ -345,7 +386,7 @@
            # otherwise numpy unsigned ints will silently cast to the signed counterpart
            fill_value = fill_value.item()
        # passes if provided fill value fits in encoded on-disk type
        new_fill = encoded_dtype.type(fill_value)
    except OverflowError:
        encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned"
        warnings.warn(
@@ -738,3 +779,37 @@
 
     def decode(self, variable: Variable, name: T_Name = None) -> Variable:
         raise NotImplementedError()
+
+
+class LiteralTimedelta64Coder(VariableCoder):
+    """Code np.timedelta64 values."""
+
+    def encode(self, variable: Variable, name: T_Name = None) -> Variable:
+        if np.issubdtype(variable.data.dtype, np.timedelta64):
+            dims, data, attrs, encoding = unpack_for_encoding(variable)
+            resolution, _ = np.datetime_data(variable.dtype)
+            attrs["dtype"] = f"timedelta64[{resolution}]"
+            data = duck_array_ops.astype(data, dtype=np.int64, copy=True)
+            return Variable(dims, data, attrs, encoding, fastpath=True)
+        else:
+            return variable
+
+    def decode(self, variable: Variable, name: T_Name = None) -> Variable:
+        if variable.attrs.get("dtype", "").startswith("timedelta64"):
+            dims, data, attrs, encoding = unpack_for_decoding(variable)
+            # overwrite (!) dtype in encoding, and remove from attrs
+            # needed for correct subsequent encoding
+            encoding["dtype"] = attrs.pop("dtype")
+            dtype = np.dtype(encoding["dtype"])
+            resolution, _ = np.datetime_data(dtype)
+            if resolution not in typing.get_args(PDDatetimeUnitOptions):
+                raise ValueError(
+                    f"Following pandas, xarray only supports decoding to "
+                    f"timedelta64 values with a resolution of 's', 'ms', "
+                    f"'us', or 'ns'. Encoded values have a resolution of "
+                    f"{resolution!r}."
+                )
+            data = Timedelta64TypeArray(data, dtype)
+            return Variable(dims, data, attrs, encoding, fastpath=True)
+        else:
+            return variable
diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -92,6 +92,7 @@ def encode_cf_variable(
     for coder in [
         CFDatetimeCoder(),
         CFTimedeltaCoder(),
+        variables.LiteralTimedelta64Coder(),
         variables.CFScaleOffsetCoder(),
         variables.CFMaskCoder(),
         variables.NativeEnumCoder(),
@@ -238,6 +239,7 @@ def decode_cf_variable(
         original_dtype = var.dtype
 
     var = variables.BooleanCoder().decode(var)
+    var = variables.LiteralTimedelta64Coder().decode(var)
 
     dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -635,7 +635,10 @@
         #  though we cannot test that until we fix the timedelta decoding
         #  to support large ranges
         time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s")  # type: ignore[arg-type, unused-ignore]
+        encoding = {"units": "seconds"}
         expected = Dataset({"td": ("td", time_deltas), "td0": time_deltas[0]})
+        expected["td"].encoding = encoding
+        expected["td0"].encoding = encoding
         with self.roundtrip(
             expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="ns")}
         ) as actual:
@@ -4185,7 +4188,7 @@
                        fx.create_dataset(k, data=v)
            with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"):
                with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds:
                    assert ds.dims == {
                        "phony_dim_0": 5,
                        "phony_dim_1": 5,
                        "phony_dim_2": 5,

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -1512,7 +1512,7 @@ def test_roundtrip_timedelta64_nanosecond_precision(
     timedelta_values[2] = nat
     timedelta_values[4] = nat
 
-    encoding = dict(dtype=dtype, _FillValue=fill_value)
+    encoding = dict(dtype=dtype, _FillValue=fill_value, units="nanoseconds")
     var = Variable(["time"], timedelta_values, encoding=encoding)
 
     encoded_var = conventions.encode_cf_variable(var)
@@ -1863,7 +1863,8 @@ def test_decode_timedelta(
     decode_times, decode_timedelta, expected_dtype, warns
 ) -> None:
     timedeltas = pd.timedelta_range(0, freq="D", periods=3)
-    var = Variable(["time"], timedeltas)
+    encoding = {"units": "days"}
+    var = Variable(["time"], timedeltas, encoding=encoding)
     encoded = conventions.encode_cf_variable(var)
     if warns:
         with pytest.warns(FutureWarning, match="decode_timedelta"):
@@ -1907,3 +1908,19 @@ def test_lazy_decode_timedelta_error() -> None:
     )
     with pytest.raises(OutOfBoundsTimedelta, match="overflow"):
         decoded.load()
+
+
+def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions):
+    timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit)
+    variable = Variable(["time"], timedeltas)
+    encoded = conventions.encode_cf_variable(variable)
+    decoded = conventions.decode_cf_variable("timedeltas", encoded)
+    assert_identical(decoded, variable)
+    assert decoded.dtype == variable.dtype
+
+
+def test_literal_timedelta_coding_resolution_error():
+    attrs = {"dtype": "timedelta64[D]"}
+    encoded = Variable(["time"], [0, 1, 2], attrs=attrs)
+    with pytest.raises(ValueError, match="xarray only supports"):
+        conventions.decode_cf_variable("timedeltas", encoded)
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
@@ -120,7 +120,6 @@ def test_incompatible_attributes(self) -> None:
             Variable(
                 ["t"], pd.date_range("2000-01-01", periods=3), {"units": "foobar"}
             ),
-            Variable(["t"], pd.to_timedelta(["1 day"]), {"units": "foobar"}),  # type: ignore[arg-type, unused-ignore]
             Variable(["t"], [0, 1, 2], {"add_offset": 0}, {"add_offset": 2}),
             Variable(["t"], [0, 1, 2], {"_FillValue": 0}, {"_FillValue": 2}),
         ]