Skip to content

Commit 3e3b7fa

Browse files
author
Scott Wales
authored
Merge pull request #27 from ScottWales/groupbystr
Allow pandas periods for blocked_resample
2 parents a7601f2 + d76c2b4 commit 3e3b7fa

3 files changed

Lines changed: 64 additions & 4 deletions

File tree

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ source =
1414
[tool:pytest]
1515
addopts = --doctest-modules --doctest-glob="*.rst"
1616
doctest_optionflags=ELLIPSIS
17-
norecursedirs = benchmark notebooks
17+
norecursedirs = benchmarks notebooks .asv
1818

1919
[mypy]
2020
files = src/climtas,test

src/climtas/blocked.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ def resample_op(block, op, axis, count):
137137
v = v[:: self.count]
138138
result.coords[k] = v
139139

140+
# Set after we create 'result' - if the original name is None it will
141+
# be replaced by the dask name, so results won't be identical to xarray
142+
result.name = self.da.name
143+
140144
return result
141145

142146
def mean(self) -> xarray.DataArray:
@@ -170,21 +174,47 @@ def blocked_resample(da: xarray.DataArray, indexer=None, **kwargs) -> BlockedRes
170174
>>> time = pandas.date_range('20010101','20010110', freq='H', closed='left')
171175
>>> hourly = xarray.DataArray(numpy.random.random(time.size), coords=[('time', time)])
172176
177+
>>> blocked_daily_max = blocked_resample(hourly, time='1D').max()
178+
>>> xarray_daily_max = hourly.resample(time='1D').max()
179+
>>> xarray.testing.assert_identical(blocked_daily_max, xarray_daily_max)
180+
173181
>>> blocked_daily_max = blocked_resample(hourly, time=24).max()
174182
>>> xarray_daily_max = hourly.resample(time='1D').max()
175-
>>> xarray.testing.assert_equal(blocked_daily_max, xarray_daily_max)
183+
>>> xarray.testing.assert_identical(blocked_daily_max, xarray_daily_max)
176184
177185
Args:
178186
da (:class:`xarray.DataArray`): Resample target
179-
indexer/kwargs (Dict[dim, count]): Mapping of dimension name to count along that axis
187+
indexer/kwargs (Dict[dim, count]): Mapping of dimension name to count
188+
along that axis. May be an integer or a time interval understood by
189+
pandas (that interval must evenly divide the dataset).
180190
181191
Returns:
182192
:class:`BlockedResampler`
183193
"""
184194
if indexer is None:
185195
indexer = kwargs
186-
assert len(indexer) == 1
196+
else:
197+
indexer = {**indexer, **kwargs}
198+
199+
if len(indexer) != 1:
200+
raise Exception(
201+
f"Only one dimension can be resampled at a time, received {indexer}"
202+
)
203+
187204
dim, count = list(indexer.items())[0]
205+
206+
if not isinstance(count, int):
207+
# Something like a pandas period, resample the time axis to get the count
208+
counts = da[dim].resample({dim: count}).count()
209+
if counts.min() != counts.max():
210+
raise Exception(
211+
f"Period '{count}' does not evenly divide dimension '{dim}'"
212+
)
213+
count = counts.values[0]
214+
215+
if da.sizes[dim] % count != 0:
216+
raise Exception(f"Period '{count}' does not evenly divide dimension '{dim}'")
217+
188218
return BlockedResampler(da, dim=dim, count=count)
189219

190220

test/test_blocked.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,22 @@ def sample(request):
3838
return samples[request.param]
3939

4040

41+
@pytest.fixture(params=["daily", "daily_dask"])
42+
def sample_hr(request):
43+
time = pandas.date_range("20020101", "20050101", freq="H", closed="left")
44+
45+
samples = {
46+
"daily": xarray.DataArray(
47+
numpy.random.random(time.size), coords=[("time", time)]
48+
),
49+
"daily_dask": xarray.DataArray(
50+
dask.array.random.random(time.size), coords=[("time", time)]
51+
),
52+
}
53+
54+
return samples[request.param]
55+
56+
4157
def test_groupby_dayofyear(sample):
4258
time = pandas.date_range("20020101", "20050101", freq="D", closed="left")
4359
daily = xarray.DataArray(numpy.random.random(time.size), coords=[("time", time)])
@@ -171,6 +187,20 @@ def test_resample_safety(sample):
171187
blocked_resample(sliced, time=24)
172188

173189

190+
def test_resample(sample_hr):
191+
expected = sample_hr.resample(time="D").mean()
192+
193+
result = blocked_resample(sample_hr, time=24).mean()
194+
xarray.testing.assert_equal(expected, result)
195+
xarray.testing.assert_identical(expected, result)
196+
197+
result = blocked_resample(sample_hr, time="D").mean()
198+
xarray.testing.assert_identical(expected, result)
199+
200+
result = blocked_resample(sample_hr, {"time": "D"}).mean()
201+
xarray.testing.assert_identical(expected, result)
202+
203+
174204
def test_groupby_safety(sample):
175205
# Not a coordinate
176206
sliced = sample

0 commit comments

Comments
 (0)