use legacy keras

pang-wu · pang-wu · commit a86d51dd482e · 2026-03-12T09:48:43.000-07:00
diff --git a/.github/workflows/pypi_release.yml b/.github/workflows/pypi_release.yml
@@ -66,7 +66,7 @@ jobs:
         pip install "numpy<1.24" "click<8.3.0"
         pip install "pydantic<2.0"
         pip install torch --index-url https://download.pytorch.org/whl/cpu
-        pip install pyarrow "ray[train,default]==${{ env.RAY_VERSION }}" tqdm pytest tensorflow==2.13.1 tabulate grpcio-tools wget
+        pip install pyarrow "ray[train,default]==${{ env.RAY_VERSION }}" tqdm pytest tensorflow==2.16.1 tf_keras tabulate grpcio-tools wget
         pip install "xgboost_ray[default]<=0.1.13"
         pip install "xgboost<=2.0.3"
         pip install torchmetrics
diff --git a/.github/workflows/ray_nightly_test.yml b/.github/workflows/ray_nightly_test.yml
@@ -90,7 +90,7 @@ jobs:
               pip install "ray[train,default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-manylinux2014_x86_64.whl"
               ;;
           esac
-          pip install pyarrow tqdm pytest "tensorflow>=2.16.1,<2.19" tabulate grpcio-tools wget
+          pip install pyarrow tqdm pytest tabulate grpcio-tools wget
           pip install "xgboost_ray[default]<=0.1.13"
           pip install torchmetrics
           HOROVOD_WITH_GLOO=1
@@ -107,7 +107,7 @@ jobs:
         run: |
           pip install pyspark==${{ matrix.spark-version }}
           ./build.sh
-          pip install dist/raydp-*.whl
+          pip install "$(ls dist/raydp-*.whl)[tensorflow]"
       - name: Lint
         run: |
           pip install pylint==3.2.7
diff --git a/.github/workflows/raydp.yml b/.github/workflows/raydp.yml
@@ -82,7 +82,7 @@ jobs:
           else
             pip install torch
           fi
-          pip install pyarrow "ray[train,default]==${{ matrix.ray-version }}" tqdm pytest "tensorflow>=2.16.1,<2.19" tabulate grpcio-tools wget
+          pip install pyarrow "ray[train,default]==${{ matrix.ray-version }}" tqdm pytest tabulate grpcio-tools wget
           pip install "xgboost_ray[default]<=0.1.13"
           pip install "xgboost<=2.0.3"
           pip install torchmetrics
@@ -97,7 +97,7 @@ jobs:
         run: |
           pip install pyspark==${{ matrix.spark-version }}
           ./build.sh
-          pip install dist/raydp-*.whl
+          pip install "$(ls dist/raydp-*.whl)[tensorflow]"
       - name: Lint
         run: |
           pip install pylint==3.2.7
diff --git a/examples/tensorflow_titanic.ipynb b/examples/tensorflow_titanic.ipynb
@@ -15,6 +15,7 @@
    "source": [
     "import ray\n",
     "import os\n",
+    "os.environ[\"TF_USE_LEGACY_KERAS\"] = \"1\"\n",
     "import re\n",
     "import pandas as pd, numpy as np\n",
     "\n",
diff --git a/python/raydp/tf/estimator.py b/python/raydp/tf/estimator.py
@@ -38,6 +38,7 @@
 from raydp.spark.interfaces import SparkEstimatorInterface, DF, OPTIONAL_DF
 from raydp import stop_spark
 
+
 class TFEstimator(EstimatorInterface, SparkEstimatorInterface):
     def __init__(self,
                  num_workers: int = 1,
@@ -175,30 +176,21 @@ def train_func(config):
             # Model building/compiling need to be within `strategy.scope()`.
             multi_worker_model = TFEstimator.build_and_compile_model(config)
 
-        # Disable auto-sharding since Ray already handles data distribution
-        # across workers. Without this, MultiWorkerMirroredStrategy tries to
-        # re-shard the dataset, producing PerReplica objects that Keras 3.x
-        # cannot convert back to tensors.
-        ds_options = tf.data.Options()
-        ds_options.experimental_distribute.auto_shard_policy = (
-            tf.data.experimental.AutoShardPolicy.OFF
-        )
-
         train_dataset = session.get_dataset_shard("train")
         train_tf_dataset = train_dataset.to_tf(
             feature_columns=config["feature_columns"],
             label_columns=config["label_columns"],
             batch_size=config["batch_size"],
             drop_last=config["drop_last"]
-        ).with_options(ds_options)
+        )
         if config["evaluate"]:
             eval_dataset = session.get_dataset_shard("evaluate")
             eval_tf_dataset = eval_dataset.to_tf(
                 feature_columns=config["feature_columns"],
                 label_columns=config["label_columns"],
                 batch_size=config["batch_size"],
                 drop_last=config["drop_last"]
-            ).with_options(ds_options)
+            )
         results = []
         callbacks = config["callbacks"]
         for _ in range(config["num_epochs"]):
diff --git a/python/setup.py b/python/setup.py
@@ -101,7 +101,6 @@ def run(self):
         "pyarrow >= 4.0.1",
         "ray >= 2.37.0",
         "pyspark >= 4.0.0",
-        "netifaces",
         "protobuf > 3.19.5"
     ]
 
@@ -132,6 +131,10 @@ def run(self):
             'build_proto_modules': CustomBuildPackageProtos,
         },
         install_requires=install_requires,
+        extras_require={
+            "tensorflow": ["tensorflow>=2.15.1,<2.16"],
+            "tensorflow-gpu": ["tensorflow[and-cuda]>=2.15.1,<2.16"],
+        },
         setup_requires=["grpcio-tools"],
         python_requires='>=3.10',
         classifiers=[