|
2 | 2 | # defined in xarray
|
3 | 3 |
|
4 | 4 |
|
| 5 | +import datetime |
5 | 6 | from typing import Any, Iterable
|
6 | 7 |
|
7 | 8 | import numpy as np
|
8 | 9 | import pandas as pd
|
9 | 10 |
|
| 11 | +try: |
| 12 | + import cftime |
| 13 | +except ImportError: |
| 14 | + cftime = None |
| 15 | + |
| 16 | + |
10 | 17 | try:
|
11 | 18 | import dask.array
|
12 | 19 |
|
|
15 | 22 | dask_array_type = ()
|
16 | 23 |
|
17 | 24 |
|
| 25 | +def asarray(data, xp=np): |
| 26 | + return data if is_duck_array(data) else xp.asarray(data) |
| 27 | + |
| 28 | + |
18 | 29 | def is_duck_array(value: Any) -> bool:
|
19 | 30 | """Checks if value is a duck array."""
|
20 | 31 | if isinstance(value, np.ndarray):
|
@@ -110,3 +121,165 @@ def isnull(data):
|
110 | 121 | # a null value as well as NaN, but it isn't clear how to do this
|
111 | 122 | # with duck typing.
|
112 | 123 | return data != data
|
| 124 | + |
| 125 | + |
| 126 | +def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): |
| 127 | + """Convert an array containing datetime-like data to numerical values. |
| 128 | + Convert the datetime array to a timedelta relative to an offset. |
| 129 | + Parameters |
| 130 | + ---------- |
| 131 | + array : array-like |
| 132 | + Input data |
| 133 | + offset : None, datetime or cftime.datetime |
| 134 | + Datetime offset. If None, this is set by default to the array's minimum |
| 135 | + value to reduce round off errors. |
| 136 | + datetime_unit : {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} |
| 137 | + If not None, convert output to a given datetime unit. Note that some |
| 138 | + conversions are not allowed due to non-linear relationships between units. |
| 139 | + dtype : dtype |
| 140 | + Output dtype. |
| 141 | + Returns |
| 142 | + ------- |
| 143 | + array |
| 144 | + Numerical representation of datetime object relative to an offset. |
| 145 | + Notes |
| 146 | + ----- |
| 147 | + Some datetime unit conversions won't work, for example from days to years, even |
| 148 | + though some calendars would allow for them (e.g. no_leap). This is because there |
| 149 | + is no `cftime.timedelta` object. |
| 150 | + """ |
| 151 | + # TODO: make this function dask-compatible? |
| 152 | + # Set offset to minimum if not given |
| 153 | + from xarray.core.duck_array_ops import _datetime_nanmin |
| 154 | + |
| 155 | + if offset is None: |
| 156 | + if array.dtype.kind in "Mm": |
| 157 | + offset = _datetime_nanmin(array) |
| 158 | + else: |
| 159 | + offset = min(array) |
| 160 | + |
| 161 | + # Compute timedelta object. |
| 162 | + # For np.datetime64, this can silently yield garbage due to overflow. |
| 163 | + # One option is to enforce 1970-01-01 as the universal offset. |
| 164 | + |
| 165 | + # This map_blocks call is for backwards compatibility. |
| 166 | + # dask == 2021.04.1 does not support subtracting object arrays |
| 167 | + # which is required for cftime |
| 168 | + if is_duck_dask_array(array) and np.issubdtype(array.dtype, object): |
| 169 | + array = array.map_blocks(lambda a, b: a - b, offset, meta=array._meta) |
| 170 | + else: |
| 171 | + array = array - offset |
| 172 | + |
| 173 | + # Scalar is converted to 0d-array |
| 174 | + if not hasattr(array, "dtype"): |
| 175 | + array = np.array(array) |
| 176 | + |
| 177 | + # Convert timedelta objects to float by first converting to microseconds. |
| 178 | + if array.dtype.kind in "O": |
| 179 | + return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype) |
| 180 | + |
| 181 | + # Convert np.NaT to np.nan |
| 182 | + elif array.dtype.kind in "mM": |
| 183 | + |
| 184 | + # Convert to specified timedelta units. |
| 185 | + if datetime_unit: |
| 186 | + array = array / np.timedelta64(1, datetime_unit) |
| 187 | + return np.where(isnull(array), np.nan, array.astype(dtype)) |
| 188 | + |
| 189 | + |
| 190 | +def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): |
| 191 | + """Convert a timedelta-like object to numerical values. |
| 192 | +
|
| 193 | + Parameters |
| 194 | + ---------- |
| 195 | + value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str |
| 196 | + Time delta representation. |
| 197 | + datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} |
| 198 | + The time units of the output values. Note that some conversions are not allowed due to |
| 199 | + non-linear relationships between units. |
| 200 | + dtype : type |
| 201 | + The output data type. |
| 202 | +
|
| 203 | + """ |
| 204 | + import datetime as dt |
| 205 | + |
| 206 | + if isinstance(value, dt.timedelta): |
| 207 | + out = py_timedelta_to_float(value, datetime_unit) |
| 208 | + elif isinstance(value, np.timedelta64): |
| 209 | + out = np_timedelta64_to_float(value, datetime_unit) |
| 210 | + elif isinstance(value, pd.Timedelta): |
| 211 | + out = pd_timedelta_to_float(value, datetime_unit) |
| 212 | + elif isinstance(value, str): |
| 213 | + try: |
| 214 | + a = pd.to_timedelta(value) |
| 215 | + except ValueError: |
| 216 | + raise ValueError( |
| 217 | + f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta" |
| 218 | + ) |
| 219 | + return py_timedelta_to_float(a, datetime_unit) |
| 220 | + else: |
| 221 | + raise TypeError( |
| 222 | + f"Expected value of type str, pandas.Timedelta, datetime.timedelta " |
| 223 | + f"or numpy.timedelta64, but received {type(value).__name__}" |
| 224 | + ) |
| 225 | + return out.astype(dtype) |
| 226 | + |
| 227 | + |
| 228 | +def _to_pytimedelta(array, unit="us"): |
| 229 | + return array.astype(f"timedelta64[{unit}]").astype(datetime.timedelta) |
| 230 | + |
| 231 | + |
| 232 | +def np_timedelta64_to_float(array, datetime_unit): |
| 233 | + """Convert numpy.timedelta64 to float. |
| 234 | +
|
| 235 | + Notes |
| 236 | + ----- |
| 237 | + The array is first converted to microseconds, which is less likely to |
| 238 | + cause overflow errors. |
| 239 | + """ |
| 240 | + array = array.astype("timedelta64[ns]").astype(np.float64) |
| 241 | + conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) |
| 242 | + return conversion_factor * array |
| 243 | + |
| 244 | + |
| 245 | +def pd_timedelta_to_float(value, datetime_unit): |
| 246 | + """Convert pandas.Timedelta to float. |
| 247 | +
|
| 248 | + Notes |
| 249 | + ----- |
| 250 | + Built on the assumption that pandas timedelta values are in nanoseconds, |
| 251 | + which is also the numpy default resolution. |
| 252 | + """ |
| 253 | + value = value.to_timedelta64() |
| 254 | + return np_timedelta64_to_float(value, datetime_unit) |
| 255 | + |
| 256 | + |
| 257 | +def _timedelta_to_seconds(array): |
| 258 | + return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 |
| 259 | + |
| 260 | + |
| 261 | +def py_timedelta_to_float(array, datetime_unit): |
| 262 | + """Convert a timedelta object to a float, possibly at a loss of resolution.""" |
| 263 | + array = asarray(array) |
| 264 | + if is_duck_dask_array(array): |
| 265 | + array = array.map_blocks(_timedelta_to_seconds, meta=np.array([], dtype=np.float64)) |
| 266 | + else: |
| 267 | + array = _timedelta_to_seconds(array) |
| 268 | + conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) |
| 269 | + return conversion_factor * array |
| 270 | + |
| 271 | + |
| 272 | +def _contains_cftime_datetimes(array) -> bool: |
| 273 | + """Check if an array contains cftime.datetime objects""" |
| 274 | + if cftime is None: |
| 275 | + return False |
| 276 | + else: |
| 277 | + if array.dtype == np.dtype("O") and array.size > 0: |
| 278 | + sample = array.ravel()[0] |
| 279 | + if is_duck_dask_array(sample): |
| 280 | + sample = sample.compute() |
| 281 | + if isinstance(sample, np.ndarray): |
| 282 | + sample = sample.item() |
| 283 | + return isinstance(sample, cftime.datetime) |
| 284 | + else: |
| 285 | + return False |
0 commit comments