Skip to content

Commit 55c316b

Browse files
authored
Change dask_cudf.to_parquet behavior for local filesystems (#18408)
`dask_cudf` is currently using fsspec to open all output files in `to_parquet`. We definitely want to let cudf/libcudf handle local filesystems in a more-optimal way. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Tom Augspurger (https://github.com/TomAugspurger) URL: #18408
1 parent 661add6 commit 55c316b

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

python/dask_cudf/dask_cudf/_legacy/io/parquet.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
2+
import contextlib
23
import itertools
34
import warnings
45
from functools import partial
@@ -22,6 +23,7 @@
2223
from cudf.core.column import CategoricalColumn, as_column
2324
from cudf.io import write_to_dataset
2425
from cudf.io.parquet import _apply_post_filters, _normalize_filters
26+
from cudf.utils import ioutils
2527
from cudf.utils.dtypes import cudf_dtype_from_pa_type
2628

2729

@@ -348,8 +350,14 @@ def write_partition(
348350
storage_options=kwargs.get("storage_options", None),
349351
)
350352
else:
351-
with fs.open(fs.sep.join([path, filename]), mode="wb") as out_file:
352-
if not isinstance(out_file, IOBase):
353+
with (
354+
contextlib.nullcontext()
355+
if ioutils._is_local_filesystem(fs)
356+
else fs.open(fs.sep.join([path, filename]), mode="wb")
357+
) as out_file:
358+
if out_file is None:
359+
out_file = fs.sep.join([path, filename])
360+
elif not isinstance(out_file, IOBase):
353361
out_file = BufferedWriter(out_file)
354362
md = df.to_parquet(
355363
path=out_file,

0 commit comments

Comments
 (0)