Skip to content

Commit ac319cc

Browse files
authored
Add quantile benchmark (#418)
1 parent e2aa2be commit ac319cc

File tree

5 files changed

+41
-10
lines changed

5 files changed

+41
-10
lines changed

asv_bench/asv.conf.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
// Customizable commands for building, installing, and
2222
// uninstalling the project. See asv.conf.json documentation.
2323
//
24-
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
2524
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
2625
// "build_command": [
2726
// "python setup.py build",
@@ -32,6 +31,9 @@
3231
"python setup.py build",
3332
"python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
3433
],
34+
"install_command": [
35+
"in-dir={env_dir} python -mpip install {wheel_file} --no-deps"
36+
],
3537

3638
// List of branches to benchmark. If not provided, defaults to "master"
3739
// (for git) or "default" (for mercurial).

asv_bench/benchmarks/cohorts.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
import flox
88

9+
from .helpers import codes_for_resampling
10+
911

1012
class Cohorts:
1113
"""Time the core reduction function."""
@@ -200,15 +202,6 @@ def setup(self, *args, **kwargs):
200202
self.expected = pd.RangeIndex(self.by.max() + 1)
201203

202204

203-
def codes_for_resampling(group_as_index, freq):
204-
s = pd.Series(np.arange(group_as_index.size), group_as_index)
205-
grouped = s.groupby(pd.Grouper(freq=freq))
206-
first_items = grouped.first()
207-
counts = grouped.count()
208-
codes = np.repeat(np.arange(len(first_items)), counts)
209-
return codes
210-
211-
212205
class PerfectBlockwiseResampling(Cohorts):
213206
"""Perfectly chunked for blockwise resampling."""
214207

asv_bench/benchmarks/helpers.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
5+
def codes_for_resampling(group_as_index: pd.Index, freq: str) -> np.ndarray:
6+
s = pd.Series(np.arange(group_as_index.size), group_as_index)
7+
grouped = s.groupby(pd.Grouper(freq=freq))
8+
first_items = grouped.first()
9+
counts = grouped.count()
10+
codes = np.repeat(np.arange(len(first_items)), counts)
11+
return codes

asv_bench/benchmarks/reduce.py

+21
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import numpy as np
22
import pandas as pd
3+
import xarray as xr
34
from asv_runner.benchmarks.mark import parameterize, skip_for_params
45

56
import flox
67
import flox.aggregations
8+
import flox.xarray
9+
10+
from .helpers import codes_for_resampling
711

812
N = 3000
913
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
@@ -138,3 +142,20 @@ def setup(self, *args, **kwargs):
138142
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
139143
# self.axis = None
140144
# setup_jit()
145+
146+
147+
class Quantile:
148+
def setup(self, *args, **kwargs):
149+
shape = (31411, 25, 25, 1)
150+
151+
time = pd.date_range("2014-01-01", "2099-12-31", freq="D")
152+
self.da = xr.DataArray(
153+
np.random.randn(*shape),
154+
name="pr",
155+
dims=("time", "lat", "lon", "lab"),
156+
coords={"time": time},
157+
)
158+
self.codes = xr.DataArray(dims="time", data=codes_for_resampling(time, "YE"), name="time")
159+
160+
def time_quantile(self):
161+
flox.xarray.xarray_reduce(self.da, self.codes, engine="flox", func="quantile", q=0.9)

readthedocs.yml

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
version: 2
22

3+
sphinx:
4+
# Path to your Sphinx configuration file.
5+
configuration: docs/source/conf.py
6+
37
build:
48
os: "ubuntu-lts-latest"
59
tools:

0 commit comments

Comments
 (0)