-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement literal np.timedelta64
coding
#10101
base: main
Are you sure you want to change the base?
Changes from 4 commits
063437b
03f2988
bdb53d7
05c3ce6
00d9eaa
b043b45
6f4e6e4
7f73753
4a8e111
9ce2a24
eb6e19a
436e588
a305238
b406c64
a21b137
5108b02
452968c
503db4a
9aee097
56f55e2
c5e7de9
d1744af
7c7b071
da1edc4
2bb4b99
0220ed5
c83fcb3
d1e8a5e
7b94d35
f269e68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
from __future__ import annotations | ||
|
||
import typing | ||
import warnings | ||
from collections.abc import Callable, Hashable, MutableMapping | ||
from functools import partial | ||
|
@@ -11,6 +12,7 @@ | |
import pandas as pd | ||
|
||
from xarray.core import dtypes, duck_array_ops, indexing | ||
from xarray.core.types import PDDatetimeUnitOptions | ||
from xarray.core.variable import Variable | ||
from xarray.namedarray.parallelcompat import get_chunked_array_type | ||
from xarray.namedarray.pycompat import is_chunked_array | ||
|
@@ -161,6 +163,45 @@ | |
return np.asarray(self.array[key], dtype=self.dtype) | ||
|
||
|
||
class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): | ||
"""Decode arrays on the fly from integer to np.timedelta64 datatype | ||
|
||
This is useful for decoding timedelta64 arrays from integer typed netCDF | ||
variables. | ||
|
||
>>> x = np.array([1, 0, 1, 1, 0], dtype="int64") | ||
|
||
>>> x.dtype | ||
dtype('int64') | ||
|
||
>>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype | ||
dtype('timedelta64[ns]') | ||
|
||
>>> indexer = indexing.BasicIndexer((slice(None),)) | ||
>>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype | ||
dtype('timedelta64[ns]') | ||
""" | ||
|
||
__slots__ = ("_dtype", "array") | ||
|
||
def __init__(self, array, dtype: np.typing.DTypeLike) -> None: | ||
self.array = indexing.as_indexable(array) | ||
self._dtype = dtype | ||
|
||
@property | ||
def dtype(self): | ||
return np.dtype(self._dtype) | ||
|
||
def _oindex_get(self, key): | ||
return np.asarray(self.array.oindex[key], dtype=self.dtype) | ||
|
||
def _vindex_get(self, key): | ||
return np.asarray(self.array.vindex[key], dtype=self.dtype) | ||
|
||
def __getitem__(self, key) -> np.ndarray: | ||
return np.asarray(self.array[key], dtype=self.dtype) | ||
|
||
|
||
def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): | ||
"""Lazily apply an element-wise function to an array. | ||
Parameters | ||
|
@@ -345,7 +386,7 @@ | |
# otherwise numpy unsigned ints will silently cast to the signed counterpart | ||
fill_value = fill_value.item() | ||
# passes if provided fill value fits in encoded on-disk type | ||
new_fill = encoded_dtype.type(fill_value) | ||
Check warning on line 389 in xarray/coding/variables.py
|
||
except OverflowError: | ||
encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned" | ||
warnings.warn( | ||
|
@@ -738,3 +779,37 @@ | |
|
||
def decode(self, variable: Variable, name: T_Name = None) -> Variable: | ||
raise NotImplementedError() | ||
|
||
|
||
class LiteralTimedelta64Coder(VariableCoder): | ||
"""Code np.timedelta64 values.""" | ||
|
||
def encode(self, variable: Variable, name: T_Name = None) -> Variable: | ||
if np.issubdtype(variable.data.dtype, np.timedelta64): | ||
dims, data, attrs, encoding = unpack_for_encoding(variable) | ||
resolution, _ = np.datetime_data(variable.dtype) | ||
attrs["dtype"] = f"timedelta64[{resolution}]" | ||
data = duck_array_ops.astype(data, dtype=np.int64, copy=True) | ||
return Variable(dims, data, attrs, encoding, fastpath=True) | ||
else: | ||
return variable | ||
|
||
def decode(self, variable: Variable, name: T_Name = None) -> Variable: | ||
if variable.attrs.get("dtype", "").startswith("timedelta64"): | ||
dims, data, attrs, encoding = unpack_for_decoding(variable) | ||
# overwrite (!) dtype in encoding, and remove from attrs | ||
# needed for correct subsequent encoding | ||
encoding["dtype"] = attrs.pop("dtype") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typically we use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the heads up! I somewhat blindly inherited this from the |
||
dtype = np.dtype(encoding["dtype"]) | ||
resolution, _ = np.datetime_data(dtype) | ||
if resolution not in typing.get_args(PDDatetimeUnitOptions): | ||
raise ValueError( | ||
f"Following pandas, xarray only supports decoding to " | ||
f"timedelta64 values with a resolution of 's', 'ms', " | ||
f"'us', or 'ns'. Encoded values have a resolution of " | ||
f"{resolution!r}." | ||
) | ||
data = Timedelta64TypeArray(data, dtype) | ||
return Variable(dims, data, attrs, encoding, fastpath=True) | ||
else: | ||
return variable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about also including
units
in attrs?That would make timedelta64 encoding still specify units in the style of CF conventions, which could make us a little more compatible with non-Xarray tools.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be possible, but this would need an additional check inside
CFTimedeltaCoder
to prevent premature encoding and decoding if both attributes are attached.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree this is a good idea, though does increase the complexity a little. I gave it a try in my latest push.