numpy 2.0 compat (#311)

dcherian · web-flow · commit e26fb39e7d68 · 2024-01-17T22:47:51.000Z
* numpy 2.0 compat

* Update upstream-dev-ci

* Update again

* Fix env building

* Update type-ignore

* Add mamba

* Better version check

* Remove numbagg from usptream

* Allow deps

* add dateutil

* add list deps

* mamba -&gt; micromamba

* remove env cache

* change order

* remove

* update env

* Try again

* Try building cftime

* Try again

* Small updates

* Remove netCDF4

* fix type ignore
diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
@@ -7,6 +7,9 @@ on:
     types: [opened, reopened, synchronize, labeled]
     branches:
       - main
+    paths:
+      - ".github/workflows/upstream-dev-ci.yaml"
+      - "ci/upstream-dev-env.yml"
   schedule:
     - cron: "0 0 * * *" # Daily “At 00:00” UTC
   workflow_dispatch: # allows you to trigger the workflow run manually
@@ -41,16 +44,49 @@ jobs:
       - name: Set up conda environment
         uses: mamba-org/setup-micromamba@v1
         with:
-          environment-file: ci/upstream-dev-env.yml
           environment-name: flox-tests
           init-shell: bash
-          cache-environment: true
+          # cache-environment: true
+          # micromamba list does not list pip dependencies, so install mamba
           create-args: >-
+            mamba
+            pip
             python=${{ matrix.python-version }}
             pytest-reportlog
+
+      - name: Install upstream dev dependencies
+        run: |
+          # install cython for building cftime without build isolation
+          micromamba install -f ci/upstream-dev-env.yml
+          micromamba remove --force numpy scipy pandas cftime
+          python -m pip install \
+            -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+            --no-deps \
+            --pre \
+            --upgrade \
+            numpy \
+            scipy \
+            pandas \
+            xarray
+          # without build isolation for packages compiling against numpy
+          # TODO: remove once there are `numpy>=2.0` builds for cftime
+          python -m pip install \
+            --no-deps \
+            --upgrade \
+            --no-build-isolation \
+            git+https://github.com/Unidata/cftime
+          python -m pip install \
+            git+https://github.com/dask/dask \
+            git+https://github.com/ml31415/numpy-groupies
+
       - name: Install flox
         run: |
           python -m pip install --no-deps -e .
+
+      - name: List deps
+        run: |
+          # micromamba list does not list pip dependencies
+          mamba list
       - name: Run Tests
         if: success()
         id: status
diff --git a/ci/environment.yml b/ci/environment.yml
@@ -4,9 +4,9 @@ channels:
 dependencies:
   - asv
   - cachey
+  - cftime
   - codecov
   - dask-core
-  - netcdf4
   - pandas
   - numpy>=1.22
   - scipy
diff --git a/ci/minimal-requirements.yml b/ci/minimal-requirements.yml
@@ -3,7 +3,6 @@ channels:
   - conda-forge
 dependencies:
   - codecov
-  - netcdf4
   - pip
   - pytest
   - pytest-cov
diff --git a/ci/no-dask.yml b/ci/no-dask.yml
@@ -3,8 +3,8 @@ channels:
   - conda-forge
 dependencies:
   - codecov
-  - netcdf4
   - pandas
+  - cftime
   - numpy>=1.22
   - scipy
   - pip
diff --git a/ci/no-numba.yml b/ci/no-numba.yml
@@ -4,9 +4,9 @@ channels:
 dependencies:
   - asv
   - cachey
+  - cftime
   - codecov
   - dask-core
-  - netcdf4
   - pandas
   - numpy>=1.22
   - scipy
diff --git a/ci/no-xarray.yml b/ci/no-xarray.yml
@@ -3,7 +3,6 @@ channels:
   - conda-forge
 dependencies:
   - codecov
-  - netcdf4
   - pandas
   - numpy>=1.22
   - scipy
diff --git a/ci/upstream-dev-env.yml b/ci/upstream-dev-env.yml
@@ -4,19 +4,25 @@ channels:
 dependencies:
   - cachey
   - codecov
-  - netcdf4
   - pooch
   - toolz
-  - numba
-  - scipy
-  - pytest
-  - pytest-cov
+  # - numpy
+  # - pandas
+  # - scipy
   - pytest-pretty
   - pytest-xdist
   - pip
-  - pip:
-      - git+https://github.com/pydata/xarray
-      - git+https://github.com/pandas-dev/pandas
-      - git+https://github.com/dask/dask
-      - git+https://github.com/ml31415/numpy-groupies
-      - git+https://github.com/numbagg/numbagg
+  # for cftime
+  - cython>=0.29.20
+  - py-cpuinfo
+  # - numba
+  - pytest
+  - pytest-cov
+  # for upstream pandas
+  - python-dateutil
+  - pytz
+  # - pip:
+  #     - git+https://github.com/pydata/xarray
+  #     - git+https://github.com/dask/dask
+  #     - git+https://github.com/ml31415/numpy-groupies
+  #     # - git+https://github.com/numbagg/numbagg
diff --git a/flox/core.py b/flox/core.py
@@ -38,6 +38,13 @@
 from .cache import memoize
 from .xrutils import is_duck_array, is_duck_dask_array, isnull, module_available
 
+if module_available("numpy", minversion="2.0.0"):
+    from numpy.lib.array_utils import (  # type: ignore[import-not-found]
+        normalize_axis_tuple,
+    )
+else:
+    from numpy.core.numeric import normalize_axis_tuple  # type: ignore[attr-defined]
+
 HAS_NUMBAGG = module_available("numbagg", minversion="0.3.0")
 
 if TYPE_CHECKING:
@@ -2179,8 +2186,7 @@ def groupby_reduce(
     if axis is None:
         axis_ = tuple(array.ndim + np.arange(-by_.ndim, 0))
     else:
-        # TODO: How come this function doesn't exist according to mypy?
-        axis_ = np.core.numeric.normalize_axis_tuple(axis, array.ndim)  # type: ignore[attr-defined]
+        axis_ = normalize_axis_tuple(axis, array.ndim)
     nax = len(axis_)
 
     has_dask = is_duck_dask_array(array) or is_duck_dask_array(by_)
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -45,6 +45,7 @@ def LooseVersion(vstring):
     return packaging.version.Version(vstring)
 
 
+has_cftime, requires_cftime = _importorskip("cftime")
 has_dask, requires_dask = _importorskip("dask")
 has_numba, requires_numba = _importorskip("numba")
 has_numbagg, requires_numbagg = _importorskip("numbagg")
diff --git a/tests/test_xarray.py b/tests/test_xarray.py
@@ -8,7 +8,13 @@
 
 from flox.xarray import rechunk_for_blockwise, xarray_reduce
 
-from . import assert_equal, has_dask, raise_if_dask_computes, requires_dask
+from . import (
+    assert_equal,
+    has_dask,
+    raise_if_dask_computes,
+    requires_cftime,
+    requires_dask,
+)
 
 if has_dask:
     import dask
@@ -178,10 +184,18 @@ def test_validate_expected_groups(expected_groups):
         )
 
 
+@requires_cftime
 @requires_dask
 def test_xarray_reduce_single_grouper(engine):
     # DataArray
-    ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9})
+    ds = xr.Dataset(
+        {"Tair": (("time", "x", "y"), dask.array.ones((36, 205, 275), chunks=(9, -1, -1)))},
+        coords={
+            "time": xr.date_range(
+                "1980-09-01 00:00", "1983-09-18 00:00", freq="ME", calendar="noleap"
+            )
+        },
+    )
     actual = xarray_reduce(ds.Tair, ds.time.dt.month, func="mean", engine=engine)
     expected = ds.Tair.groupby("time.month").mean()
     xr.testing.assert_allclose(actual, expected)
@@ -355,7 +369,14 @@ def test_xarray_groupby_bins(chunks, engine):
 def test_func_is_aggregation():
     from flox.aggregations import mean
 
-    ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9})
+    ds = xr.Dataset(
+        {"Tair": (("time", "x", "y"), dask.array.ones((36, 205, 275), chunks=(9, -1, -1)))},
+        coords={
+            "time": xr.date_range(
+                "1980-09-01 00:00", "1983-09-18 00:00", freq="ME", calendar="noleap"
+            )
+        },
+    )
     expected = xarray_reduce(ds.Tair, ds.time.dt.month, func="mean")
     actual = xarray_reduce(ds.Tair, ds.time.dt.month, func=mean)
     xr.testing.assert_allclose(actual, expected)
@@ -392,10 +413,18 @@ def test_func_is_aggregation():
 @requires_dask
 @pytest.mark.parametrize("method", ["cohorts", "map-reduce"])
 def test_groupby_bins_indexed_coordinate(method):
-    ds = (
-        xr.tutorial.open_dataset("air_temperature")
-        .isel(time=slice(100))
-        .chunk({"time": 20, "lat": 5})
+    ds = xr.Dataset(
+        {
+            "air": (
+                ("time", "lat", "lon"),
+                dask.array.random.random((125, 25, 53), chunks=(20, 5, -1)),
+            )
+        },
+        coords={
+            "time": pd.date_range("2013-01-01", "2013-02-01", freq="6H"),
+            "lat": np.arange(75.0, 14.9, -2.5),
+            "lon": np.arange(200.0, 331.0, 2.5),
+        },
     )
     bins = [40, 50, 60, 70]
     expected = ds.groupby_bins("lat", bins=bins).mean(keep_attrs=True, dim=...)