ray-project
diff --git a/‎README.md‎
Lines changed: 9 additions & 16 deletions b/‎README.md‎
Lines changed: 9 additions & 16 deletions
diff --git a/‎core/raydp-main/src/main/scala/org/apache/spark/executor/RayDPExecutor.scala‎
Lines changed: 0 additions & 31 deletions b/‎core/raydp-main/src/main/scala/org/apache/spark/executor/RayDPExecutor.scala‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎core/raydp-main/src/main/scala/org/apache/spark/sql/raydp/ObjectStoreWriter.scala‎
Lines changed: 1 addition & 137 deletions b/‎core/raydp-main/src/main/scala/org/apache/spark/sql/raydp/ObjectStoreWriter.scala‎
Lines changed: 1 addition & 137 deletions
diff --git a/‎python/raydp/context.py‎
Lines changed: 6 additions & 11 deletions b/‎python/raydp/context.py‎
Lines changed: 6 additions & 11 deletions
@@ -153,32 +153,25 @@ Please refer to [NYC Taxi PyTorch Estimator](./examples/pytorch_nyctaxi.py) and
 
 ***Fault Tolerance***
 
-The ray dataset converted from spark dataframe like above is not fault-tolerant. This is because we implement it using `Ray.put` combined with spark `mapPartitions`. Objects created by `Ray.put` is not recoverable in Ray.
+RayDP now converts Spark DataFrames to Ray Datasets using a recoverable pipeline by default. This makes the resulting Ray Dataset resilient to Spark executor loss (the Arrow IPC bytes are cached in Spark and fetched via Ray tasks with lineage).
+
+The recoverable conversion is also available directly via `raydp.spark.from_spark_recoverable`, and it persists (caches) the Spark DataFrame. You can provide the storage level through the `storage_level` keyword parameter.
 
-RayDP now supports converting data in a way such that the resulting ray dataset is fault-tolerant. This feature is currently *experimental*. Here is how to use it:
 ```python
 import ray
 import raydp
 
-# Fault tolerance requires cross language support:
-# https://docs.ray.io/en/latest/ray-core/cross-language.html
-# set job_config to trigger load-code-from-local
-ray.init(address="auto", 
-         job_config=JobConfig(code_search_path=[os.getcwd()]))
-# set fault_tolerance_mode to True to enable the feature
-# this will connect pyspark driver to ray cluster
+ray.init(address="auto")
 spark = raydp.init_spark(app_name="RayDP Example",
                          num_executors=2,
                          executor_cores=2,
-                         executor_memory="4GB",
-                         fault_tolerance_mode=True)
-# df should be large enough so that result will be put into plasma
+                         executor_memory="4GB")
+
 df = spark.range(100000)
-# use this API instead of ray.data.from_spark
-ds = raydp.spark.from_spark_recoverable(df)
-# ds is now fault-tolerant.
+ds = raydp.spark.from_spark_recoverable(df)  # fault-tolerant
 ```
-Notice that `from_spark_recoverable` will persist the converted dataframe. You can provide the storage level through keyword parameter `storage_level`. In addition, this feature is not available in ray client mode. If you need to use ray client, please wrap your application in a ray actor, as described in the ray client chapter.
+
+Note: recoverable conversion is not available in Ray client mode. If you need to use Ray client, wrap your application in a Ray actor as described in the Ray client docs.
 
 
 ## Getting Involved
 
@@ -20,7 +20,6 @@ package org.apache.spark.executor
 import java.io.{ByteArrayOutputStream, File}
 import java.nio.channels.Channels
 import java.nio.file.Paths
-import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.reflect.classTag
@@ -269,22 +268,6 @@ class RayDPExecutor(
     Ray.exitActor
   }
 
-  /**
-   * Pop (remove and return) a previously stored Arrow IPC stream by key.
-   *
-   * This method is intended to be called from a Python "owner/registry" actor via Ray
-   * cross-language actor calls. Since the Python actor is the caller, Ray will assign
-   * ownership of the returned object to that Python actor.
-   */
-  def popArrowIPC(batchKey: String): Array[Byte] = {
-    val bytes = RayDPExecutor.popArrowIPC(batchKey)
-    if (bytes == null) {
-      throw new RayDPException(
-        s"Missing Arrow IPC bytes for batchKey=$batchKey on executorId=$executorId.")
-    }
-    bytes
-  }
-
   def getBlockLocations(rddId: Int, numPartitions: Int): Array[String] = {
     val env = SparkEnv.get
     val blockIds = (0 until numPartitions).map(i =>
@@ -367,17 +350,3 @@ class RayDPExecutor(
     result
   }
 }
-
-object RayDPExecutor {
-  // Per-executor in-memory buffer for Arrow IPC streams produced by Spark tasks.
-  // Stored in the executor (Ray actor) process; entries are removed by popArrowIPC.
-  private val arrowIpcByKey = new ConcurrentHashMap[String, Array[Byte]]()
-
-  def putArrowIPC(batchKey: String, bytes: Array[Byte]): Unit = {
-    arrowIpcByKey.put(batchKey, bytes)
-  }
-
-  def popArrowIPC(batchKey: String): Array[Byte] = {
-    arrowIpcByKey.remove(batchKey)
-  }
-}
@@ -20,162 +20,26 @@ package org.apache.spark.sql.raydp
 import com.intel.raydp.shims.SparkShimLoader
 import io.ray.api.{ActorHandle, ObjectRef, Ray}
 import io.ray.runtime.AbstractRayRuntime
-import java.io.ByteArrayOutputStream
 import java.util.{List, UUID}
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue}
 import java.util.function.{Function => JFunction}
-import org.apache.arrow.vector.VectorSchemaRoot
-import org.apache.arrow.vector.ipc.ArrowStreamWriter
 import org.apache.arrow.vector.types.pojo.Schema
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{RayDPException, SparkContext, SparkEnv}
+import org.apache.spark.{RayDPException, SparkContext}
 import org.apache.spark.deploy.raydp._
 import org.apache.spark.executor.RayDPExecutor
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.raydp.{RayDPUtils, RayExecutorUtils}
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.execution.arrow.ArrowWriter
-import org.apache.spark.sql.execution.python.BatchIterator
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.Utils
-
-/**
- * A batch of record that has been wrote into Ray object store.
- * @param ownerAddress the owner address of the ray worker
- * @param objectId the ObjectId for the stored data
- * @param numRecords the number of records for the stored data
- */
-case class RecordBatch(
-    ownerAddress: Array[Byte],
-    objectId: Array[Byte],
-    numRecords: Int)
 
 class ObjectStoreWriter(@transient val df: DataFrame) extends Serializable {
 
   val uuid: UUID = ObjectStoreWriter.dfToId.getOrElseUpdate(df, UUID.randomUUID())
 
-  def writeToRay(
-      data: Array[Byte],
-      numRecords: Int,
-      ownerName: String): RecordBatch = {
-
-    // Single-owner implementation:
-    // - Spark executor JVM actor produces Arrow IPC bytes (data).
-    // - Bytes are buffered inside the executor actor process keyed by batchKey.
-    // - JVM returns (executorActorName, batchKey) to Python.
-    // - A Python single owner actor later calls executorActor.popArrowIPC(batchKey),
-    //   decodes into pyarrow.Table and becomes the Ray owner of the resulting Dataset blocks.
-    //
-    // We keep ownerName non-empty for API consistency and to avoid accidental usage without
-    // a dedicated owner actor on the Python side.
-    if (ownerName == null || ownerName.isEmpty) {
-      throw new RayDPException("ownerName must be set for Spark->Ray conversion.")
-    }
-
-    val executorId = SparkEnv.get.executorId
-    val executorActorName = s"raydp-executor-${executorId}"
-    val batchKey = UUID.randomUUID().toString
-
-    // Buffer bytes in the executor actor process. The Python owner actor will pop them via
-    // cross-language actor call later.
-    RayDPExecutor.putArrowIPC(batchKey, data)
-
-    // RecordBatch payload:
-    // - ownerAddress encodes the RayDPExecutor actor name (UTF-8)
-    // - objectId encodes the batch key (UTF-8)
-    RecordBatch(executorActorName.getBytes("UTF-8"), batchKey.getBytes("UTF-8"), numRecords)
-  }
-
-  /**
-   * Save the DataFrame to Ray object store with Apache Arrow format.
-   */
-  def save(useBatch: Boolean, ownerName: String): List[RecordBatch] = {
-    val conf = df.queryExecution.sparkSession.sessionState.conf
-    val timeZoneId = conf.getConf(SQLConf.SESSION_LOCAL_TIMEZONE)
-    var batchSize = conf.getConf(SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)
-    if (!useBatch) {
-      batchSize = 0
-    }
-    val schema = df.schema
-
-    val objectIds = df.queryExecution.toRdd.mapPartitions{ iter =>
-      // DO NOT use iter.grouped(). See BatchIterator.
-      val batchIter = if (batchSize > 0) {
-        new BatchIterator(iter, batchSize)
-      } else {
-        Iterator(iter)
-      }
-
-      val arrowSchema = SparkShimLoader.getSparkShims.toArrowSchema(schema, timeZoneId)
-      val allocator = ArrowUtils.rootAllocator.newChildAllocator(
-        s"ray object store writer", 0, Long.MaxValue)
-      val root = VectorSchemaRoot.create(arrowSchema, allocator)
-      val results = new ArrayBuffer[RecordBatch]()
-
-      val byteOut = new ByteArrayOutputStream()
-      val arrowWriter = ArrowWriter.create(root)
-      var numRecords: Int = 0
-
-      Utils.tryWithSafeFinally {
-        while (batchIter.hasNext) {
-          // reset the state
-          numRecords = 0
-          byteOut.reset()
-          arrowWriter.reset()
-
-          // write out the schema meta data
-          val writer = new ArrowStreamWriter(root, null, byteOut)
-          writer.start()
-
-          // get the next record batch
-          val nextBatch = batchIter.next()
-
-          while (nextBatch.hasNext) {
-            numRecords += 1
-            arrowWriter.write(nextBatch.next())
-          }
-
-          // set the write record count
-          arrowWriter.finish()
-          // write out the record batch to the underlying out
-          writer.writeBatch()
-
-          // get the wrote ByteArray and save to Ray ObjectStore
-          val byteArray = byteOut.toByteArray
-          results += writeToRay(byteArray, numRecords, ownerName)
-          // end writes footer to the output stream and doesn't clean any resources.
-          // It could throw exception if the output stream is closed, so it should be
-          // in the try block.
-          writer.end()
-        }
-        arrowWriter.reset()
-        byteOut.close()
-      } {
-        // If we close root and allocator in TaskCompletionListener, there could be a race
-        // condition where the writer thread keeps writing to the VectorSchemaRoot while
-        // it's being closed by the TaskCompletion listener.
-        // Closing root and allocator here is cleaner because root and allocator is owned
-        // by the writer thread and is only visible to the writer thread.
-        //
-        // If the writer thread is interrupted by TaskCompletionListener, it should either
-        // (1) in the try block, in which case it will get an InterruptedException when
-        // performing io, and goes into the finally block or (2) in the finally block,
-        // in which case it will ignore the interruption and close the resources.
-
-        root.close()
-        allocator.close()
-      }
-
-      results.toIterator
-    }.collect()
-    objectIds.toSeq.asJava
-  }
-
   /**
    * For test.
    */
 
@@ -21,6 +21,7 @@
 from typing import Dict, List, Union, Optional
 
 import ray
+import ray.util.client as ray_client
 from pyspark.sql import SparkSession
 
 from ray.util.placement_group import PlacementGroup
@@ -62,6 +63,8 @@ class _SparkContext(ContextDecorator):
                     please install the corresponding spark version first, set ENV SPARK_HOME,
                     configure spark-env.sh HADOOP_CONF_DIR in spark conf, and copy hive-site.xml
                     and hdfs-site.xml to ${SPARK_HOME}/ conf
+    :param fault_tolerant_mode: enable recoverable Spark->Ray conversion by default.
+                              Not supported in Ray client mode.
     :param placement_group_strategy: RayDP will create a placement group according to the
                                      strategy and the configured resources for executors.
                                      If this parameter is specified, the next two
@@ -181,7 +184,7 @@ def init_spark(app_name: str,
                executor_cores: int,
                executor_memory: Union[str, int],
                enable_hive: bool = False,
-               fault_tolerant_mode = False,
+               fault_tolerant_mode = True,
                placement_group_strategy: Optional[str] = None,
                placement_group: Optional[PlacementGroup] = None,
                placement_group_bundle_indexes: Optional[List[int]] = None,
@@ -213,16 +216,8 @@ def init_spark(app_name: str,
         # ray has not initialized, init local
         ray.init()
 
-    if fault_tolerant_mode:
-        print(
-    '''
-    Caution: Fault-tolerant mode is now experimental!
-            This mode CANNOT be used in ray client mode.
-            Use raydp.spark.from_spark_recoverable instead of ray.data.from_spark
-            to make your data recoverable.
-            The spark dataframe converted this way will be cached.
-    '''
-        )
+    if fault_tolerant_mode and ray_client.ray.is_connected():
+        raise Exception("fault_tolerant_mode is not supported in Ray client mode.")
 
     with _spark_context_lock:
         global _global_spark_context