Skip to content

Commit f6dea71

Browse files
authored
compat: Spatialpandas with dask-expr (#1405)
1 parent 6a5dbb3 commit f6dea71

File tree

6 files changed

+18
-241
lines changed

6 files changed

+18
-241
lines changed

datashader/tests/test_dask.py

+9-78
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import datashader.utils as du
1313

1414
import pytest
15-
from datashader.tests.utils import dask_switcher
1615
from datashader.tests.test_pandas import _pandas
1716

1817
try:
@@ -34,39 +33,26 @@
3433
pytestmark = pytest.importorskip("dask")
3534

3635

37-
38-
@dask_switcher(query=False)
3936
def _dask():
4037
return dd.from_pandas(_pandas(), npartitions=2)
4138

42-
@dask_switcher(query=True)
43-
def _dask_expr():
44-
return dd.from_pandas(_pandas(), npartitions=2)
4539

46-
@dask_switcher(query=False, extras=["dask_cudf"])
4740
def _dask_cudf():
4841
import dask_cudf
42+
4943
_dask = dd.from_pandas(_pandas(), npartitions=2)
5044
if Version(dask_cudf.__version__) >= Version("24.06"):
5145
return _dask.to_backend("cudf")
5246
else:
5347
return dask_cudf.from_dask_dataframe(_dask)
5448

55-
@dask_switcher(query=True, extras=["dask_cudf"])
56-
def _dask_expr_cudf():
57-
import dask_cudf
58-
if Version(dask_cudf.__version__) < Version("24.06"):
59-
pytest.skip("dask-expr requires dask-cudf 24.06 or later")
60-
_dask = dd.from_pandas(_pandas(), npartitions=2)
61-
return _dask.to_backend("cudf")
6249

6350
_backends = [
6451
pytest.param(_dask, id="dask"),
65-
pytest.param(_dask_expr, id="dask-expr"),
6652
pytest.param(_dask_cudf, marks=pytest.mark.gpu, id="dask-cudf"),
67-
pytest.param(_dask_expr_cudf, marks=pytest.mark.gpu, id="dask-expr-cudf"),
6853
]
6954

55+
7056
@pytest.fixture(params=_backends)
7157
def ddf(request):
7258
return request.param()
@@ -76,7 +62,7 @@ def ddf(request):
7662
def npartitions(request):
7763
return request.param
7864

79-
@dask_switcher(query=False)
65+
8066
def _dask_DataFrame(*args, **kwargs):
8167
if kwargs.pop("geo", False):
8268
df = sp.GeoDataFrame(*args, **kwargs)
@@ -85,53 +71,21 @@ def _dask_DataFrame(*args, **kwargs):
8571
return dd.from_pandas(df, npartitions=2)
8672

8773

88-
@dask_switcher(query=True)
89-
def _dask_expr_DataFrame(*args, **kwargs):
90-
if kwargs.pop("geo", False):
91-
pytest.skip("dask-expr currently does not work with spatialpandas")
92-
# df = sp.GeoDataFrame(*args, **kwargs)
93-
else:
94-
df = pd.DataFrame(*args, **kwargs)
95-
return dd.from_pandas(df, npartitions=2)
96-
97-
98-
@dask_switcher(query=False, extras=["dask_cudf"])
9974
def _dask_cudf_DataFrame(*args, **kwargs):
10075
import cudf
10176
import dask_cudf
102-
if kwargs.pop("geo", False):
103-
# As of dask-cudf version 24.06, dask-cudf is not
104-
# compatible with spatialpandas version 0.4.10
105-
pytest.skip("dask-cudf currently does not work with spatialpandas")
106-
cdf = cudf.DataFrame.from_pandas(
107-
pd.DataFrame(*args, **kwargs), nan_as_null=False
108-
)
109-
return dask_cudf.from_cudf(cdf, npartitions=2)
110-
111-
112-
@dask_switcher(query=True, extras=["dask_cudf"])
113-
def _dask_expr_cudf_DataFrame(*args, **kwargs):
114-
import cudf
115-
import dask_cudf
116-
117-
if Version(dask_cudf.__version__) < Version("24.06"):
118-
pytest.skip("dask-expr requires dask-cudf 24.06 or later")
11977

12078
if kwargs.pop("geo", False):
12179
# As of dask-cudf version 24.06, dask-cudf is not
12280
# compatible with spatialpandas version 0.4.10
12381
pytest.skip("dask-cudf currently does not work with spatialpandas")
124-
cdf = cudf.DataFrame.from_pandas(
125-
pd.DataFrame(*args, **kwargs), nan_as_null=False
126-
)
82+
cdf = cudf.DataFrame.from_pandas(pd.DataFrame(*args, **kwargs), nan_as_null=False)
12783
return dask_cudf.from_cudf(cdf, npartitions=2)
12884

12985

13086
_backends = [
13187
pytest.param(_dask_DataFrame, id="dask"),
132-
pytest.param(_dask_expr_DataFrame, id="dask-expr"),
13388
pytest.param(_dask_cudf_DataFrame, marks=pytest.mark.gpu, id="dask-cudf"),
134-
pytest.param(_dask_expr_cudf_DataFrame, marks=pytest.mark.gpu, id="dask-expr-cudf"),
13589
]
13690

13791
@pytest.fixture(params=_backends)
@@ -163,25 +117,6 @@ def floats(n):
163117
n = n + np.spacing(n)
164118

165119

166-
@pytest.mark.gpu
167-
def test_check_query_setting():
168-
import os
169-
from subprocess import check_output, SubprocessError
170-
171-
# dask-cudf does not support query planning as of 24.04.
172-
# So we check that it is not set outside of Python.
173-
assert os.environ.get('DASK_DATAFRAME__QUERY_PLANNING', 'false').lower() != 'true'
174-
175-
# This also have problem with the global setting so we check
176-
try:
177-
cmd = ['dask', 'config', 'get', 'dataframe.query-planning']
178-
output = check_output(cmd, text=True).strip().lower()
179-
assert output != 'true'
180-
except SubprocessError:
181-
# Newer version will error out if not set
182-
pass
183-
184-
185120
def test_count(ddf, npartitions):
186121
ddf = ddf.repartition(npartitions=npartitions)
187122
assert ddf.npartitions == npartitions
@@ -1236,7 +1171,6 @@ def test_log_axis_points(ddf):
12361171

12371172

12381173
@pytest.mark.skipif(not sp, reason="spatialpandas not installed")
1239-
@dask_switcher(query=False, extras=["spatialpandas.dask"])
12401174
def test_points_geometry():
12411175
axis = ds.core.LinearAxis()
12421176
lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3)
@@ -1257,7 +1191,6 @@ def test_points_geometry():
12571191
assert_eq_xr(agg, out)
12581192

12591193

1260-
@dask_switcher(query=False, extras=["spatialpandas.dask"])
12611194
def test_line(DataFrame):
12621195
axis = ds.core.LinearAxis()
12631196
lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7)
@@ -1339,7 +1272,6 @@ def test_line(DataFrame):
13391272
}, dtype='Line[int64]'), dict(geometry='geom'))
13401273
)
13411274

1342-
@dask_switcher(query=False, extras=["spatialpandas.dask"])
13431275
@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params[5:7])
13441276
def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs, request):
13451277
if "cudf" in request.node.name:
@@ -1452,7 +1384,6 @@ def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs, request):
14521384
}, dtype='Line[int64]'), dict(geometry='geom'))
14531385
)
14541386

1455-
@dask_switcher(query=False, extras=["spatialpandas.dask"])
14561387
@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_autorange_params)
14571388
def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs, request):
14581389
if "cudf" in request.node.name:
@@ -1621,7 +1552,7 @@ def test_auto_range_line(DataFrame):
16211552
}, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1))
16221553
])
16231554
def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs):
1624-
if DataFrame in (_dask_cudf_DataFrame, _dask_expr_cudf_DataFrame):
1555+
if DataFrame == _dask_cudf_DataFrame:
16251556
if df_kwargs.get('dtype', '').startswith('Ragged'):
16261557
pytest.skip("Ragged array not supported with cudf")
16271558

@@ -1713,7 +1644,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs):
17131644
}, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1))
17141645
])
17151646
def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs):
1716-
if DataFrame in (_dask_cudf_DataFrame, _dask_expr_cudf_DataFrame):
1647+
if DataFrame ==_dask_cudf_DataFrame:
17171648
if df_kwargs.get('dtype', '').startswith('Ragged'):
17181649
pytest.skip("Ragged array not supported with cudf")
17191650

@@ -1790,7 +1721,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs):
17901721
}, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1))
17911722
])
17921723
def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs):
1793-
if DataFrame in (_dask_cudf_DataFrame, _dask_expr_cudf_DataFrame):
1724+
if DataFrame ==_dask_cudf_DataFrame:
17941725
if df_kwargs.get('dtype', '').startswith('Ragged'):
17951726
pytest.skip("Ragged array not supported with cudf")
17961727

@@ -1893,7 +1824,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs):
18931824
}, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1))
18941825
])
18951826
def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs):
1896-
if DataFrame in (_dask_cudf_DataFrame, _dask_expr_cudf_DataFrame):
1827+
if DataFrame == _dask_cudf_DataFrame:
18971828
if df_kwargs.get('dtype', '').startswith('Ragged'):
18981829
pytest.skip("Ragged array not supported with cudf")
18991830

@@ -1980,7 +1911,7 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs):
19801911
}, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1))
19811912
])
19821913
def test_area_to_line_autorange_gap(DataFrame, df_kwargs, cvs_kwargs):
1983-
if DataFrame in (_dask_cudf_DataFrame, _dask_expr_cudf_DataFrame):
1914+
if DataFrame == _dask_cudf_DataFrame:
19841915
if df_kwargs.get('dtype', '').startswith('Ragged'):
19851916
pytest.skip("Ragged array not supported with cudf")
19861917

datashader/tests/test_geopandas.py

+6-38
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,16 @@
11
# Testing GeoPandas and SpatialPandas
2-
import contextlib
32

43
import datashader as ds
54
from datashader.tests.test_pandas import assert_eq_ndarray
65
import numpy as np
76
from numpy import nan
87
import pytest
9-
from datashader.tests.utils import dask_switcher
10-
from packaging.version import Version
118

129
try:
1310
import dask.dataframe as dd
1411
except ImportError:
1512
dd = None
1613

17-
_backends = [
18-
pytest.param(False, id="dask"),
19-
]
20-
21-
_extras = ["spatialpandas.dask", "dask_geopandas.backends", "dask_geopandas"]
22-
23-
with contextlib.suppress(ImportError):
24-
import dask_geopandas
25-
26-
if Version(dask_geopandas.__version__) >= Version("0.4.0"):
27-
_backends.append(pytest.param(True, id="dask-expr"))
28-
29-
30-
@pytest.fixture(params=_backends)
31-
def dask_both(request):
32-
with dask_switcher(query=request.param, extras=_extras): ...
33-
return request.param
34-
35-
@pytest.fixture
36-
def dask_classic(request):
37-
with dask_switcher(query=False, extras=_extras): ...
3814

3915
try:
4016
import dask_geopandas
@@ -129,14 +105,6 @@ def dask_classic(request):
129105
])
130106

131107

132-
@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed")
133-
def test_dask_geopandas_switcher(dask_both):
134-
import dask_geopandas
135-
if dask_both:
136-
assert dask_geopandas.expr.GeoDataFrame == dask_geopandas.GeoDataFrame
137-
else:
138-
assert dask_geopandas.core.GeoDataFrame == dask_geopandas.GeoDataFrame
139-
140108

141109
@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed")
142110
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
@@ -177,7 +145,7 @@ def test_lines_geopandas(geom_type, explode, use_boundary):
177145
("linestring", True, True),
178146
],
179147
)
180-
def test_lines_dask_geopandas(geom_type, explode, use_boundary, npartitions, dask_both):
148+
def test_lines_dask_geopandas(geom_type, explode, use_boundary, npartitions):
181149
df = geopandas.read_file(geodatasets.get_path("nybb"))
182150
df["col"] = np.arange(len(df)) # Extra column for aggregation.
183151
geometry = "boundary" if use_boundary else "geometry"
@@ -209,7 +177,7 @@ def test_lines_dask_geopandas(geom_type, explode, use_boundary, npartitions, das
209177
("linestring", True, True),
210178
],
211179
)
212-
def test_lines_spatialpandas(geom_type, explode, use_boundary, npartitions, dask_classic):
180+
def test_lines_spatialpandas(geom_type, explode, use_boundary, npartitions):
213181
df = geopandas.read_file(geodatasets.get_path("nybb"))
214182
df["col"] = np.arange(len(df)) # Extra column for aggregation.
215183
geometry = "boundary" if use_boundary else "geometry"
@@ -252,7 +220,7 @@ def test_points_geopandas(geom_type):
252220
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
253221
@pytest.mark.parametrize('npartitions', [1, 2, 5])
254222
@pytest.mark.parametrize("geom_type", ["multipoint", "point"])
255-
def test_points_dask_geopandas(geom_type, npartitions, dask_both):
223+
def test_points_dask_geopandas(geom_type, npartitions):
256224
df = geopandas.read_file(geodatasets.get_path("nybb"))
257225

258226
df["geometry"] = df["geometry"].sample_points(100, rng=93814) # multipoint
@@ -274,7 +242,7 @@ def test_points_dask_geopandas(geom_type, npartitions, dask_both):
274242
@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed")
275243
@pytest.mark.parametrize('npartitions', [0, 1, 2, 5])
276244
@pytest.mark.parametrize("geom_type", ["multipoint", "point"])
277-
def test_points_spatialpandas(geom_type, npartitions, dask_classic):
245+
def test_points_spatialpandas(geom_type, npartitions):
278246
df = geopandas.read_file(geodatasets.get_path("nybb"))
279247

280248
df["geometry"] = df["geometry"].sample_points(100, rng=93814) # multipoint
@@ -315,7 +283,7 @@ def test_polygons_geopandas(geom_type):
315283
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
316284
@pytest.mark.parametrize('npartitions', [1, 2, 5])
317285
@pytest.mark.parametrize("geom_type", ["multipolygon", "polygon"])
318-
def test_polygons_dask_geopandas(geom_type, npartitions, dask_both):
286+
def test_polygons_dask_geopandas(geom_type, npartitions):
319287
df = geopandas.read_file(geodatasets.get_path("nybb"))
320288
df["col"] = np.arange(len(df))
321289

@@ -338,7 +306,7 @@ def test_polygons_dask_geopandas(geom_type, npartitions, dask_both):
338306
@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed")
339307
@pytest.mark.parametrize('npartitions', [0, 1, 2, 5])
340308
@pytest.mark.parametrize("geom_type", ["multipolygon", "polygon"])
341-
def test_polygons_spatialpandas(geom_type, npartitions, dask_classic):
309+
def test_polygons_spatialpandas(geom_type, npartitions):
342310
df = geopandas.read_file(geodatasets.get_path("nybb"))
343311
df["col"] = np.arange(len(df))
344312

datashader/tests/test_polygons.py

-4
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,12 @@
44
import xarray as xr
55
import datashader as ds
66
from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr
7-
from datashader.tests.utils import dask_switcher
87

98
try:
109
import dask.dataframe as dd
1110
except ImportError:
1211
dd = None
1312

14-
@pytest.fixture(autouse=True)
15-
def _classic_dd():
16-
with dask_switcher(query=False, extras=["spatialpandas.dask"]): ...
1713

1814
try:
1915
# Import to register extension arrays

0 commit comments

Comments
 (0)