diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index e1e5e31f305d..145486df7654 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -6,6 +6,7 @@ from ray.air.constants import TENSOR_COLUMN_NAME from ray.air.data_batch_type import DataBatchType +from ray.data.util.expression_utils import get_setting_with_copy_warning from ray.util.annotations import Deprecated, DeveloperAPI if TYPE_CHECKING: @@ -285,12 +286,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra """ Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ - pd = _lazy_import_pandas() - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -313,7 +310,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( @@ -328,12 +326,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" - pd = _lazy_import_pandas() - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. @@ -348,6 +342,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = list(col.to_numpy()) return df diff --git a/python/ray/data/util/data_batch_conversion.py b/python/ray/data/util/data_batch_conversion.py index ce09694b78c8..b36eb5cfad85 100644 --- a/python/ray/data/util/data_batch_conversion.py +++ b/python/ray/data/util/data_batch_conversion.py @@ -6,6 +6,7 @@ from ray.air.data_batch_type import DataBatchType from ray.data.constants import TENSOR_COLUMN_NAME +from ray.data.util.expression_utils import get_setting_with_copy_warning from ray.util.annotations import DeveloperAPI if TYPE_CHECKING: @@ -219,12 +220,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra """ Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ - pd = _lazy_import_pandas() - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -247,7 +244,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( @@ -262,12 +260,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" - pd = _lazy_import_pandas() - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. @@ -282,6 +276,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = list(col.to_numpy()) return df diff --git a/python/ray/data/util/expression_utils.py b/python/ray/data/util/expression_utils.py index 08a70a65cbec..53aeb385a23d 100644 --- a/python/ray/data/util/expression_utils.py +++ b/python/ray/data/util/expression_utils.py @@ -2,10 +2,36 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional +from ray.util.annotations import DeveloperAPI + if TYPE_CHECKING: from ray.data.expressions import Expr +@DeveloperAPI +def get_setting_with_copy_warning() -> Optional[type]: + """Get the SettingWithCopyWarning class from pandas, if available. + + Pandas has moved/renamed this warning across versions, and pandas 3.x may not + expose it at all. This function handles the version differences gracefully + using hasattr checks instead of try-except blocks. + + Returns: + The SettingWithCopyWarning class if found, None otherwise. + """ + import pandas as pd + + # Use hasattr to avoid try-catch blocks as suggested + if hasattr(pd.core.common, "SettingWithCopyWarning"): + return pd.core.common.SettingWithCopyWarning + elif hasattr(pd.errors, "SettingWithCopyWarning"): + return pd.errors.SettingWithCopyWarning + else: + # Warning not available in this pandas version + return None + + +@DeveloperAPI def create_callable_class_udf_init_fn( exprs: List["Expr"], ) -> Optional[Callable[[], None]]: diff --git a/python/requirements.txt b/python/requirements.txt index 26d8acef1332..78f8c4c4882d 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -46,13 +46,12 @@ aiohttp_cors dm_tree uvicorn prometheus_client>=0.7.1 -pandas +pandas>=1.3,<3 tensorboardX aiohttp>=3.13.3 starlette typer fsspec -pandas>=1.3 pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*,!=2.11.*,<3 # Serve users can use pydantic<2 py-spy>=0.2.0; python_version < '3.12' py-spy>=0.4.0; python_version >= '3.12' diff --git a/python/setup.py b/python/setup.py index a8c8b23e0e57..2fbf35f590e4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -218,6 +218,7 @@ def get_packages(self): # also update the matching section of requirements/requirements.txt # in this directory if setup_spec.type == SetupType.RAY: + # Ray Data now supports pandas 3.x through graceful version checking pandas_dep = "pandas >= 1.3" numpy_dep = "numpy >= 1.20" pyarrow_deps = [ @@ -270,7 +271,7 @@ def get_packages(self): "watchfiles", ], "tune": [ - "pandas", + pandas_dep, # TODO: Remove pydantic dependency from tune once tune doesn't import train pydantic_dep, "tensorboardX>=1.9",