Add quantile benchmark (#418)

dcherian · web-flow · commit ac319cc2edc7 · 2025-01-23T21:39:35.000-07:00
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -21,7 +21,6 @@
   // Customizable commands for building, installing, and
   // uninstalling the project. See asv.conf.json documentation.
   //
-  // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
   // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
   // "build_command": [
   //     "python setup.py build",
@@ -32,6 +31,9 @@
     "python setup.py build",
     "python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
   ],
+  "install_command": [
+    "in-dir={env_dir} python -mpip install {wheel_file} --no-deps"
+  ],
 
   // List of branches to benchmark. If not provided, defaults to "master"
   // (for git) or "default" (for mercurial).
diff --git a/asv_bench/benchmarks/cohorts.py b/asv_bench/benchmarks/cohorts.py
@@ -6,6 +6,8 @@
 
 import flox
 
+from .helpers import codes_for_resampling
+
 
 class Cohorts:
     """Time the core reduction function."""
@@ -200,15 +202,6 @@ def setup(self, *args, **kwargs):
         self.expected = pd.RangeIndex(self.by.max() + 1)
 
 
-def codes_for_resampling(group_as_index, freq):
-    s = pd.Series(np.arange(group_as_index.size), group_as_index)
-    grouped = s.groupby(pd.Grouper(freq=freq))
-    first_items = grouped.first()
-    counts = grouped.count()
-    codes = np.repeat(np.arange(len(first_items)), counts)
-    return codes
-
-
 class PerfectBlockwiseResampling(Cohorts):
     """Perfectly chunked for blockwise resampling."""
 
diff --git a/asv_bench/benchmarks/helpers.py b/asv_bench/benchmarks/helpers.py
@@ -0,0 +1,11 @@
+import numpy as np
+import pandas as pd
+
+
+def codes_for_resampling(group_as_index: pd.Index, freq: str) -> np.ndarray:
+    s = pd.Series(np.arange(group_as_index.size), group_as_index)
+    grouped = s.groupby(pd.Grouper(freq=freq))
+    first_items = grouped.first()
+    counts = grouped.count()
+    codes = np.repeat(np.arange(len(first_items)), counts)
+    return codes
diff --git a/asv_bench/benchmarks/reduce.py b/asv_bench/benchmarks/reduce.py
@@ -1,9 +1,13 @@
 import numpy as np
 import pandas as pd
+import xarray as xr
 from asv_runner.benchmarks.mark import parameterize, skip_for_params
 
 import flox
 import flox.aggregations
+import flox.xarray
+
+from .helpers import codes_for_resampling
 
 N = 3000
 funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
@@ -138,3 +142,20 @@ def setup(self, *args, **kwargs):
 #         self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
 #         self.axis = None
 #         setup_jit()
+
+
+class Quantile:
+    def setup(self, *args, **kwargs):
+        shape = (31411, 25, 25, 1)
+
+        time = pd.date_range("2014-01-01", "2099-12-31", freq="D")
+        self.da = xr.DataArray(
+            np.random.randn(*shape),
+            name="pr",
+            dims=("time", "lat", "lon", "lab"),
+            coords={"time": time},
+        )
+        self.codes = xr.DataArray(dims="time", data=codes_for_resampling(time, "YE"), name="time")
+
+    def time_quantile(self):
+        flox.xarray.xarray_reduce(self.da, self.codes, engine="flox", func="quantile", q=0.9)
diff --git a/readthedocs.yml b/readthedocs.yml
@@ -1,5 +1,9 @@
 version: 2
 
+sphinx:
+  # Path to your Sphinx configuration file.
+  configuration: docs/source/conf.py
+
 build:
   os: "ubuntu-lts-latest"
   tools: