From 2155526ab30f0e37b7c3ace200464d6fff9e84fd Mon Sep 17 00:00:00 2001 From: daiping8 Date: Thu, 22 Jan 2026 15:42:18 +0800 Subject: [PATCH 1/5] [Dependencies] Update pandas version constraints in requirements and setup files - Updated pandas dependency in requirements.txt to specify version range: >=1.3,<3. - Adjusted setup.py to reflect the same version constraints for Ray Data, ensuring compatibility with future pandas versions. - Modified data_batch_conversion.py and util/data_batch_conversion.py to handle potential changes in SettingWithCopyWarning across pandas versions. This change is aimed at maintaining compatibility with upcoming pandas releases while avoiding breaking changes in the codebase. Change-Id: I64b4b464ed63350839c365a81e65f0d6e4b0f53f Signed-off-by: daiping8 --- python/ray/air/util/data_batch_conversion.py | 28 +++++++++++++------ python/ray/data/util/data_batch_conversion.py | 28 +++++++++++++------ python/requirements.txt | 4 +-- python/setup.py | 7 +++-- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index e1e5e31f305d..83628ea4ada5 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -286,11 +286,16 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ pd = _lazy_import_pandas() + # Pandas has moved/renamed this warning across versions, and pandas 3.x may not + # expose it at all. If we can't find it, just don't attempt to filter it. + SettingWithCopyWarning = None try: SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + try: + SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + SettingWithCopyWarning = None from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -313,7 +318,10 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter( + "ignore", category=SettingWithCopyWarning + ) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( @@ -329,11 +337,14 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" pd = _lazy_import_pandas() + SettingWithCopyWarning = None try: SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + try: + SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + SettingWithCopyWarning = None from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. @@ -348,6 +359,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = list(col.to_numpy()) return df diff --git a/python/ray/data/util/data_batch_conversion.py b/python/ray/data/util/data_batch_conversion.py index 25a356eb1329..fb490e86181f 100644 --- a/python/ray/data/util/data_batch_conversion.py +++ b/python/ray/data/util/data_batch_conversion.py @@ -221,11 +221,16 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ pd = _lazy_import_pandas() + # Pandas has moved/renamed this warning across versions, and pandas 3.x may not + # expose it at all. If we can't find it, just don't attempt to filter it. + SettingWithCopyWarning = None try: SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + try: + SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + SettingWithCopyWarning = None from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -248,7 +253,10 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter( + "ignore", category=SettingWithCopyWarning + ) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( @@ -264,11 +272,14 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" pd = _lazy_import_pandas() + SettingWithCopyWarning = None try: SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except AttributeError: - # SettingWithCopyWarning was moved to pd.errors in Pandas 1.5.0. - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + try: + SettingWithCopyWarning = pd.errors.SettingWithCopyWarning + except Exception: + SettingWithCopyWarning = None from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. @@ -283,6 +294,7 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": # https://stackoverflow.com/a/74193599 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) - warnings.simplefilter("ignore", category=SettingWithCopyWarning) + if SettingWithCopyWarning is not None: + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = list(col.to_numpy()) return df diff --git a/python/requirements.txt b/python/requirements.txt index 26d8acef1332..12e855be525c 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -46,13 +46,13 @@ aiohttp_cors dm_tree uvicorn prometheus_client>=0.7.1 -pandas +pandas>=1.3,<3 tensorboardX aiohttp>=3.13.3 starlette typer fsspec -pandas>=1.3 +pandas>=1.3,<3 pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*,!=2.11.*,<3 # Serve users can use pydantic<2 py-spy>=0.2.0; python_version < '3.12' py-spy>=0.4.0; python_version >= '3.12' diff --git a/python/setup.py b/python/setup.py index 76d146eed8f5..28a3fdf87ed0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -218,7 +218,10 @@ def get_packages(self): # also update the matching section of requirements/requirements.txt # in this directory if setup_spec.type == SetupType.RAY: - pandas_dep = "pandas >= 1.3" + # Ray Data currently relies on pandas APIs that are not available in pandas 3.x + # (e.g. SettingWithCopyWarning). Keep an upper bound until full pandas 3 support + # is added. + pandas_dep = "pandas >= 1.3, < 3" numpy_dep = "numpy >= 1.20" pyarrow_deps = [ "pyarrow >= 9.0.0", @@ -270,7 +273,7 @@ def get_packages(self): "watchfiles", ], "tune": [ - "pandas", + pandas_dep, # TODO: Remove pydantic dependency from tune once tune doesn't import train pydantic_dep, "tensorboardX>=1.9", From 365cab832753a1e139e08f9f9aa87a6d39adf936 Mon Sep 17 00:00:00 2001 From: daiping8 Date: Thu, 22 Jan 2026 17:03:35 +0800 Subject: [PATCH 2/5] 1 Change-Id: Ibcbc6efba323adbf906cb844f9ea5d33d4c4ed30 Signed-off-by: daiping8 --- python/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/python/requirements.txt b/python/requirements.txt index 12e855be525c..78f8c4c4882d 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -52,7 +52,6 @@ aiohttp>=3.13.3 starlette typer fsspec -pandas>=1.3,<3 pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*,!=2.11.*,<3 # Serve users can use pydantic<2 py-spy>=0.2.0; python_version < '3.12' py-spy>=0.4.0; python_version >= '3.12' From cb3abe5e57d65991023daff0af2627ab60cd877b Mon Sep 17 00:00:00 2001 From: daiping8 Date: Thu, 22 Jan 2026 18:31:34 +0800 Subject: [PATCH 3/5] 1 Change-Id: I952be3671074ae3778f2964dbd583170b7508b9c Signed-off-by: daiping8 --- python/ray/air/util/data_batch_conversion.py | 4 +--- python/ray/data/util/data_batch_conversion.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index 83628ea4ada5..0563efa36133 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -319,9 +319,7 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) if SettingWithCopyWarning is not None: - warnings.simplefilter( - "ignore", category=SettingWithCopyWarning - ) + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( diff --git a/python/ray/data/util/data_batch_conversion.py b/python/ray/data/util/data_batch_conversion.py index fb490e86181f..bce85fce3bce 100644 --- a/python/ray/data/util/data_batch_conversion.py +++ b/python/ray/data/util/data_batch_conversion.py @@ -254,9 +254,7 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) if SettingWithCopyWarning is not None: - warnings.simplefilter( - "ignore", category=SettingWithCopyWarning - ) + warnings.simplefilter("ignore", category=SettingWithCopyWarning) df[col_name] = TensorArray(col) except Exception as e: raise ValueError( From e96ae6976d4e9a6f9ac34fab44e3662b47d09035 Mon Sep 17 00:00:00 2001 From: daiping8 Date: Sat, 7 Feb 2026 13:15:45 +0800 Subject: [PATCH 4/5] [Dependencies] Update pandas dependency handling and improve SettingWithCopyWarning retrieval Signed-off-by: daiping8 --- python/ray/air/util/data_batch_conversion.py | 23 ++++--------------- python/ray/data/util/data_batch_conversion.py | 23 ++++--------------- python/ray/data/util/expression_utils.py | 22 ++++++++++++++++++ python/setup.py | 6 ++--- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index 0563efa36133..a7b565364cf1 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -6,6 +6,7 @@ from ray.air.constants import TENSOR_COLUMN_NAME from ray.air.data_batch_type import DataBatchType +from ray.data.util.expression_utils import get_setting_with_copy_warning from ray.util.annotations import Deprecated, DeveloperAPI if TYPE_CHECKING: @@ -286,16 +287,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ pd = _lazy_import_pandas() - # Pandas has moved/renamed this warning across versions, and pandas 3.x may not - # expose it at all. If we can't find it, just don't attempt to filter it. - SettingWithCopyWarning = None - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except Exception: - try: - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning - except Exception: - SettingWithCopyWarning = None + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -335,14 +328,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" pd = _lazy_import_pandas() - SettingWithCopyWarning = None - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except Exception: - try: - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning - except Exception: - SettingWithCopyWarning = None + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. diff --git a/python/ray/data/util/data_batch_conversion.py b/python/ray/data/util/data_batch_conversion.py index bce85fce3bce..3b83a4d8870e 100644 --- a/python/ray/data/util/data_batch_conversion.py +++ b/python/ray/data/util/data_batch_conversion.py @@ -6,6 +6,7 @@ from ray.air.data_batch_type import DataBatchType from ray.data.constants import TENSOR_COLUMN_NAME +from ray.data.util.expression_utils import get_setting_with_copy_warning from ray.util.annotations import DeveloperAPI if TYPE_CHECKING: @@ -221,16 +222,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ pd = _lazy_import_pandas() - # Pandas has moved/renamed this warning across versions, and pandas 3.x may not - # expose it at all. If we can't find it, just don't attempt to filter it. - SettingWithCopyWarning = None - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except Exception: - try: - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning - except Exception: - SettingWithCopyWarning = None + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import ( TensorArray, @@ -270,14 +263,8 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" pd = _lazy_import_pandas() - SettingWithCopyWarning = None - try: - SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning - except Exception: - try: - SettingWithCopyWarning = pd.errors.SettingWithCopyWarning - except Exception: - SettingWithCopyWarning = None + # Get the SettingWithCopyWarning class if available + SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype # Try to convert any tensor extension columns to ndarray columns. diff --git a/python/ray/data/util/expression_utils.py b/python/ray/data/util/expression_utils.py index 08a70a65cbec..878b7e5e855c 100644 --- a/python/ray/data/util/expression_utils.py +++ b/python/ray/data/util/expression_utils.py @@ -6,6 +6,28 @@ from ray.data.expressions import Expr +def get_setting_with_copy_warning() -> Optional[type]: + """Get the SettingWithCopyWarning class from pandas, if available. + + Pandas has moved/renamed this warning across versions, and pandas 3.x may not + expose it at all. This function handles the version differences gracefully + using hasattr checks instead of try-except blocks. + + Returns: + The SettingWithCopyWarning class if found, None otherwise. + """ + import pandas as pd + + # Use hasattr to avoid try-catch blocks as suggested + if hasattr(pd.core.common, "SettingWithCopyWarning"): + return pd.core.common.SettingWithCopyWarning + elif hasattr(pd.errors, "SettingWithCopyWarning"): + return pd.errors.SettingWithCopyWarning + else: + # Warning not available in this pandas version + return None + + def create_callable_class_udf_init_fn( exprs: List["Expr"], ) -> Optional[Callable[[], None]]: diff --git a/python/setup.py b/python/setup.py index 28a3fdf87ed0..83d6b69a2c33 100644 --- a/python/setup.py +++ b/python/setup.py @@ -218,10 +218,8 @@ def get_packages(self): # also update the matching section of requirements/requirements.txt # in this directory if setup_spec.type == SetupType.RAY: - # Ray Data currently relies on pandas APIs that are not available in pandas 3.x - # (e.g. SettingWithCopyWarning). Keep an upper bound until full pandas 3 support - # is added. - pandas_dep = "pandas >= 1.3, < 3" + # Ray Data now supports pandas 3.x through graceful version checking + pandas_dep = "pandas >= 1.3" numpy_dep = "numpy >= 1.20" pyarrow_deps = [ "pyarrow >= 9.0.0", From 93949e1a945ea7190d8d3ce7d364814e9f7a2f52 Mon Sep 17 00:00:00 2001 From: daiping8 Date: Sat, 7 Feb 2026 14:11:28 +0800 Subject: [PATCH 5/5] [Refactor] Remove lazy import of pandas in data batch conversion functions Signed-off-by: daiping8 --- python/ray/air/util/data_batch_conversion.py | 2 -- python/ray/data/util/data_batch_conversion.py | 2 -- python/ray/data/util/expression_utils.py | 4 ++++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index a7b565364cf1..145486df7654 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -286,7 +286,6 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra """ Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ - pd = _lazy_import_pandas() # Get the SettingWithCopyWarning class if available SettingWithCopyWarning = get_setting_with_copy_warning() @@ -327,7 +326,6 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" - pd = _lazy_import_pandas() # Get the SettingWithCopyWarning class if available SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype diff --git a/python/ray/data/util/data_batch_conversion.py b/python/ray/data/util/data_batch_conversion.py index 3b83a4d8870e..f308a921dab7 100644 --- a/python/ray/data/util/data_batch_conversion.py +++ b/python/ray/data/util/data_batch_conversion.py @@ -221,7 +221,6 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra """ Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray. """ - pd = _lazy_import_pandas() # Get the SettingWithCopyWarning class if available SettingWithCopyWarning = get_setting_with_copy_warning() @@ -262,7 +261,6 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": """Cast all tensor extension columns in df to NumPy ndarrays.""" - pd = _lazy_import_pandas() # Get the SettingWithCopyWarning class if available SettingWithCopyWarning = get_setting_with_copy_warning() from ray.data._internal.tensor_extensions.pandas import TensorDtype diff --git a/python/ray/data/util/expression_utils.py b/python/ray/data/util/expression_utils.py index 878b7e5e855c..53aeb385a23d 100644 --- a/python/ray/data/util/expression_utils.py +++ b/python/ray/data/util/expression_utils.py @@ -2,10 +2,13 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional +from ray.util.annotations import DeveloperAPI + if TYPE_CHECKING: from ray.data.expressions import Expr +@DeveloperAPI def get_setting_with_copy_warning() -> Optional[type]: """Get the SettingWithCopyWarning class from pandas, if available. @@ -28,6 +31,7 @@ def get_setting_with_copy_warning() -> Optional[type]: return None +@DeveloperAPI def create_callable_class_udf_init_fn( exprs: List["Expr"], ) -> Optional[Callable[[], None]]: