|
1 |
| -# Copyright (c) 2018-2023, NVIDIA CORPORATION. |
| 1 | +# Copyright (c) 2018-2024, NVIDIA CORPORATION. |
2 | 2 |
|
| 3 | +from dask import config |
| 4 | + |
| 5 | +# For dask>2024.2.0, we can silence the loud deprecation |
| 6 | +# warning before importing `dask.dataframe` (this won't |
| 7 | +# do anything for dask==2024.2.0) |
| 8 | +config.set({"dataframe.query-planning-warning": False}) |
| 9 | + |
| 10 | +import dask.dataframe as dd |
3 | 11 | from dask.dataframe import from_delayed
|
4 | 12 |
|
5 | 13 | import cudf
|
6 | 14 |
|
7 | 15 | from . import backends
|
8 | 16 | from ._version import __git_commit__, __version__
|
9 |
| -from .core import DataFrame, Series, concat, from_cudf, from_dask_dataframe |
10 |
| -from .groupby import groupby_agg |
11 |
| -from .io import read_csv, read_json, read_orc, read_text, to_orc |
| 17 | +from .core import concat, from_cudf, from_dask_dataframe |
| 18 | +from .expr import QUERY_PLANNING_ON |
| 19 | + |
| 20 | + |
| 21 | +def read_csv(*args, **kwargs): |
| 22 | + with config.set({"dataframe.backend": "cudf"}): |
| 23 | + return dd.read_csv(*args, **kwargs) |
| 24 | + |
| 25 | + |
| 26 | +def read_json(*args, **kwargs): |
| 27 | + with config.set({"dataframe.backend": "cudf"}): |
| 28 | + return dd.read_json(*args, **kwargs) |
| 29 | + |
| 30 | + |
| 31 | +def read_orc(*args, **kwargs): |
| 32 | + with config.set({"dataframe.backend": "cudf"}): |
| 33 | + return dd.read_orc(*args, **kwargs) |
| 34 | + |
| 35 | + |
| 36 | +def read_parquet(*args, **kwargs): |
| 37 | + with config.set({"dataframe.backend": "cudf"}): |
| 38 | + return dd.read_parquet(*args, **kwargs) |
| 39 | + |
| 40 | + |
| 41 | +def raise_not_implemented_error(attr_name): |
| 42 | + def inner_func(*args, **kwargs): |
| 43 | + raise NotImplementedError( |
| 44 | + f"Top-level {attr_name} API is not available for dask-expr." |
| 45 | + ) |
| 46 | + |
| 47 | + return inner_func |
| 48 | + |
| 49 | + |
| 50 | +if QUERY_PLANNING_ON: |
| 51 | + from .expr._collection import DataFrame, Index, Series |
| 52 | + |
| 53 | + groupby_agg = raise_not_implemented_error("groupby_agg") |
| 54 | + read_text = raise_not_implemented_error("read_text") |
| 55 | + to_orc = raise_not_implemented_error("to_orc") |
| 56 | +else: |
| 57 | + from .core import DataFrame, Index, Series |
| 58 | + from .groupby import groupby_agg |
| 59 | + from .io import read_text, to_orc |
12 | 60 |
|
13 |
| -try: |
14 |
| - from .io import read_parquet |
15 |
| -except ImportError: |
16 |
| - pass |
17 | 61 |
|
18 | 62 | __all__ = [
|
19 | 63 | "DataFrame",
|
20 | 64 | "Series",
|
| 65 | + "Index", |
21 | 66 | "from_cudf",
|
22 | 67 | "from_dask_dataframe",
|
23 | 68 | "concat",
|
24 | 69 | "from_delayed",
|
25 | 70 | ]
|
26 | 71 |
|
| 72 | + |
27 | 73 | if not hasattr(cudf.DataFrame, "mean"):
|
28 | 74 | cudf.DataFrame.mean = None
|
29 | 75 | del cudf
|
0 commit comments