Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions python/ray/air/util/data_batch_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ray.air.constants import TENSOR_COLUMN_NAME
from ray.air.data_batch_type import DataBatchType
from ray.data.util.expression_utils import get_setting_with_copy_warning
from ray.util.annotations import Deprecated, DeveloperAPI

if TYPE_CHECKING:
Expand Down Expand Up @@ -285,12 +286,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra
"""
Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray.
"""
pd = _lazy_import_pandas()
try:
SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
except AttributeError:
# SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
# Get the SettingWithCopyWarning class if available
SettingWithCopyWarning = get_setting_with_copy_warning()

from ray.data._internal.tensor_extensions.pandas import (
TensorArray,
Expand All @@ -313,7 +310,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra
# https://stackoverflow.com/a/74193599
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
if SettingWithCopyWarning is not None:
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
df[col_name] = TensorArray(col)
except Exception as e:
raise ValueError(
Expand All @@ -328,12 +326,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra

def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame":
"""Cast all tensor extension columns in df to NumPy ndarrays."""
pd = _lazy_import_pandas()
try:
SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
except AttributeError:
# SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
# Get the SettingWithCopyWarning class if available
SettingWithCopyWarning = get_setting_with_copy_warning()
from ray.data._internal.tensor_extensions.pandas import TensorDtype

# Try to convert any tensor extension columns to ndarray columns.
Expand All @@ -348,6 +342,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame":
# https://stackoverflow.com/a/74193599
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
if SettingWithCopyWarning is not None:
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
df[col_name] = list(col.to_numpy())
return df
23 changes: 9 additions & 14 deletions python/ray/data/util/data_batch_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ray.air.data_batch_type import DataBatchType
from ray.data.constants import TENSOR_COLUMN_NAME
from ray.data.util.expression_utils import get_setting_with_copy_warning
from ray.util.annotations import DeveloperAPI

if TYPE_CHECKING:
Expand Down Expand Up @@ -219,12 +220,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra
"""
Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray.
"""
pd = _lazy_import_pandas()
try:
SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
except AttributeError:
# SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
# Get the SettingWithCopyWarning class if available
SettingWithCopyWarning = get_setting_with_copy_warning()

from ray.data._internal.tensor_extensions.pandas import (
TensorArray,
Expand All @@ -247,7 +244,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra
# https://stackoverflow.com/a/74193599
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
if SettingWithCopyWarning is not None:
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
df[col_name] = TensorArray(col)
except Exception as e:
raise ValueError(
Expand All @@ -262,12 +260,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra

def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame":
"""Cast all tensor extension columns in df to NumPy ndarrays."""
pd = _lazy_import_pandas()
try:
SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
except AttributeError:
# SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0.
SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
# Get the SettingWithCopyWarning class if available
SettingWithCopyWarning = get_setting_with_copy_warning()
from ray.data._internal.tensor_extensions.pandas import TensorDtype

# Try to convert any tensor extension columns to ndarray columns.
Expand All @@ -282,6 +276,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame":
# https://stackoverflow.com/a/74193599
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
if SettingWithCopyWarning is not None:
warnings.simplefilter("ignore", category=SettingWithCopyWarning)
df[col_name] = list(col.to_numpy())
return df
26 changes: 26 additions & 0 deletions python/ray/data/util/expression_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,36 @@

from typing import TYPE_CHECKING, Any, Callable, List, Optional

from ray.util.annotations import DeveloperAPI

if TYPE_CHECKING:
from ray.data.expressions import Expr


@DeveloperAPI
def get_setting_with_copy_warning() -> Optional[type]:
"""Get the SettingWithCopyWarning class from pandas, if available.

Pandas has moved/renamed this warning across versions, and pandas 3.x may not
expose it at all. This function handles the version differences gracefully
using hasattr checks instead of try-except blocks.

Returns:
The SettingWithCopyWarning class if found, None otherwise.
"""
import pandas as pd

# Use hasattr to avoid try-catch blocks as suggested
if hasattr(pd.core.common, "SettingWithCopyWarning"):
return pd.core.common.SettingWithCopyWarning
elif hasattr(pd.errors, "SettingWithCopyWarning"):
return pd.errors.SettingWithCopyWarning
else:
# Warning not available in this pandas version
return None


@DeveloperAPI
def create_callable_class_udf_init_fn(
exprs: List["Expr"],
) -> Optional[Callable[[], None]]:
Expand Down
3 changes: 1 addition & 2 deletions python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,12 @@ aiohttp_cors
dm_tree
uvicorn
prometheus_client>=0.7.1
pandas
pandas>=1.3,<3
tensorboardX
aiohttp>=3.13.3
starlette
typer
fsspec
pandas>=1.3
pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*,!=2.11.*,<3 # Serve users can use pydantic<2
py-spy>=0.2.0; python_version < '3.12'
py-spy>=0.4.0; python_version >= '3.12'
Expand Down
3 changes: 2 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def get_packages(self):
# also update the matching section of requirements/requirements.txt
# in this directory
if setup_spec.type == SetupType.RAY:
# Ray Data now supports pandas 3.x through graceful version checking
pandas_dep = "pandas >= 1.3"
numpy_dep = "numpy >= 1.20"
pyarrow_deps = [
Expand Down Expand Up @@ -270,7 +271,7 @@ def get_packages(self):
"watchfiles",
],
"tune": [
"pandas",
pandas_dep,
# TODO: Remove pydantic dependency from tune once tune doesn't import train
pydantic_dep,
"tensorboardX>=1.9",
Expand Down