enable stage-level scheduling for knn and dbscan (#990)

eordentlich · web-flow · commit bf4ad1de9084 · 2025-10-30T08:33:09.000-07:00
move stage-level scheduling invocation to _call_cuml_fit func so that
knn and dbscan also benefit

these algos invoke cuml from the model and not the estimator _fit, so
were not leveraging stage-level scheduling.

Signed-off-by: Erik Ordentlich &lt;eordentlich@gmail.com&gt;
diff --git a/python/src/spark_rapids_ml/core.py b/python/src/spark_rapids_ml/core.py
@@ -638,6 +638,111 @@ def _cuml_fit(df: CumlInputType, params: Dict[str, Any]) -> Dict[str, Any]:
         """
         raise NotImplementedError()
 
+    def _skip_stage_level_scheduling(self, spark_version: str, conf: SparkConf) -> bool:
+        """Check if stage-level scheduling is not needed,
+        return true to skip stage-level scheduling"""
+
+        if spark_version < "3.4.0":
+            self.logger.info(
+                "Stage-level scheduling in spark-rapids-ml requires spark version 3.4.0+"
+            )
+            return True
+
+        if "3.4.0" <= spark_version < "3.5.1" and not _is_standalone_or_localcluster(
+            conf
+        ):
+            self.logger.info(
+                "For Spark %s, Stage-level scheduling in spark-rapids-ml requires spark "
+                "standalone or local-cluster mode",
+                spark_version,
+            )
+            return True
+
+        executor_cores = conf.get("spark.executor.cores")
+        executor_gpus = conf.get("spark.executor.resource.gpu.amount")
+        if executor_cores is None or executor_gpus is None:
+            self.logger.info(
+                "Stage-level scheduling in spark-rapids-ml requires spark.executor.cores, "
+                "spark.executor.resource.gpu.amount to be set."
+            )
+            return True
+
+        if int(executor_cores) == 1:
+            # there will be only 1 task running at any time.
+            self.logger.info(
+                "Stage-level scheduling in spark-rapids-ml requires spark.executor.cores > 1 "
+            )
+            return True
+
+        if int(executor_gpus) > 1:
+            # For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure
+            # to make spark-rapids-ml run successfully.
+            self.logger.info(
+                "Stage-level scheduling in spark-rapids-ml will not work "
+                "when spark.executor.resource.gpu.amount>1"
+            )
+            return True
+
+        task_gpu_amount = conf.get("spark.task.resource.gpu.amount")
+
+        if task_gpu_amount is None:
+            # The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,
+            # but with stage-level scheduling, we can make training task grab the gpu.
+            return False
+
+        if float(task_gpu_amount) == float(executor_gpus):
+            # spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount "
+            # results in only 1 task running at a time, which may cause perf issue.
+            return True
+
+        # We can enable stage-level scheduling
+        return False
+
+    def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:
+        ss = _get_spark_session()
+        sc = ss.sparkContext
+
+        if _is_local(sc) or self._skip_stage_level_scheduling(ss.version, sc.getConf()):
+            return rdd
+
+        # executor_cores will not be None
+        executor_cores = ss.sparkContext.getConf().get("spark.executor.cores")
+        assert executor_cores is not None
+
+        from pyspark.resource.profile import ResourceProfileBuilder
+        from pyspark.resource.requests import TaskResourceRequests
+
+        # each training task requires cpu cores > total executor cores/2 which can
+        # ensure each training task be sent to different executor.
+        #
+        # Please note that we can't set task_cores to the value which is smaller than total executor cores/2
+        # because only task_gpus can't ensure the tasks be sent to different executor even task_gpus=1.0
+        #
+        # If spark-rapids enabled. we don't allow other ETL task running alongside training task to avoid OOM
+        spark_plugins = ss.conf.get("spark.plugins", " ")
+        assert spark_plugins is not None
+        spark_rapids_sql_enabled = ss.conf.get("spark.rapids.sql.enabled", "true")
+        assert spark_rapids_sql_enabled is not None
+
+        task_cores = (
+            int(executor_cores)
+            if "com.nvidia.spark.SQLPlugin" in spark_plugins
+            and "true" == spark_rapids_sql_enabled.lower()
+            else (int(executor_cores) // 2) + 1
+        )
+        # task_gpus means how many slots per gpu address the task requires,
+        # it does mean how many gpus it would like to require, so it can be any value of (0, 0.5] or 1.
+        task_gpus = 1.0
+
+        treqs = TaskResourceRequests().cpus(task_cores).resource("gpu", task_gpus)
+        rp = ResourceProfileBuilder().require(treqs).build
+
+        self.logger.info(
+            f"Training tasks require the resource(cores={task_cores}, gpu={task_gpus})"
+        )
+
+        return rdd.withResources(rp)
+
     def _call_cuml_fit_func(
         self,
         dataset: DataFrame,
@@ -911,6 +1016,8 @@ def _train_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame:
             .mapPartitions(lambda x: x)
         )
 
+        pipelined_rdd = self._try_stage_level_scheduling(pipelined_rdd)
+
         return pipelined_rdd
 
     def _fit_array_order(self) -> _ArrayOrder:
@@ -1081,111 +1188,6 @@ def fitMultipleModels() -> List["_CumlModel"]:
         else:
             return super().fitMultiple(dataset, paramMaps)
 
-    def _skip_stage_level_scheduling(self, spark_version: str, conf: SparkConf) -> bool:
-        """Check if stage-level scheduling is not needed,
-        return true to skip stage-level scheduling"""
-
-        if spark_version < "3.4.0":
-            self.logger.info(
-                "Stage-level scheduling in spark-rapids-ml requires spark version 3.4.0+"
-            )
-            return True
-
-        if "3.4.0" <= spark_version < "3.5.1" and not _is_standalone_or_localcluster(
-            conf
-        ):
-            self.logger.info(
-                "For Spark %s, Stage-level scheduling in spark-rapids-ml requires spark "
-                "standalone or local-cluster mode",
-                spark_version,
-            )
-            return True
-
-        executor_cores = conf.get("spark.executor.cores")
-        executor_gpus = conf.get("spark.executor.resource.gpu.amount")
-        if executor_cores is None or executor_gpus is None:
-            self.logger.info(
-                "Stage-level scheduling in spark-rapids-ml requires spark.executor.cores, "
-                "spark.executor.resource.gpu.amount to be set."
-            )
-            return True
-
-        if int(executor_cores) == 1:
-            # there will be only 1 task running at any time.
-            self.logger.info(
-                "Stage-level scheduling in spark-rapids-ml requires spark.executor.cores > 1 "
-            )
-            return True
-
-        if int(executor_gpus) > 1:
-            # For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure
-            # to make spark-rapids-ml run successfully.
-            self.logger.info(
-                "Stage-level scheduling in spark-rapids-ml will not work "
-                "when spark.executor.resource.gpu.amount>1"
-            )
-            return True
-
-        task_gpu_amount = conf.get("spark.task.resource.gpu.amount")
-
-        if task_gpu_amount is None:
-            # The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,
-            # but with stage-level scheduling, we can make training task grab the gpu.
-            return False
-
-        if float(task_gpu_amount) == float(executor_gpus):
-            # spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount "
-            # results in only 1 task running at a time, which may cause perf issue.
-            return True
-
-        # We can enable stage-level scheduling
-        return False
-
-    def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:
-        ss = _get_spark_session()
-        sc = ss.sparkContext
-
-        if _is_local(sc) or self._skip_stage_level_scheduling(ss.version, sc.getConf()):
-            return rdd
-
-        # executor_cores will not be None
-        executor_cores = ss.sparkContext.getConf().get("spark.executor.cores")
-        assert executor_cores is not None
-
-        from pyspark.resource.profile import ResourceProfileBuilder
-        from pyspark.resource.requests import TaskResourceRequests
-
-        # each training task requires cpu cores > total executor cores/2 which can
-        # ensure each training task be sent to different executor.
-        #
-        # Please note that we can't set task_cores to the value which is smaller than total executor cores/2
-        # because only task_gpus can't ensure the tasks be sent to different executor even task_gpus=1.0
-        #
-        # If spark-rapids enabled. we don't allow other ETL task running alongside training task to avoid OOM
-        spark_plugins = ss.conf.get("spark.plugins", " ")
-        assert spark_plugins is not None
-        spark_rapids_sql_enabled = ss.conf.get("spark.rapids.sql.enabled", "true")
-        assert spark_rapids_sql_enabled is not None
-
-        task_cores = (
-            int(executor_cores)
-            if "com.nvidia.spark.SQLPlugin" in spark_plugins
-            and "true" == spark_rapids_sql_enabled.lower()
-            else (int(executor_cores) // 2) + 1
-        )
-        # task_gpus means how many slots per gpu address the task requires,
-        # it does mean how many gpus it would like to require, so it can be any value of (0, 0.5] or 1.
-        task_gpus = 1.0
-
-        treqs = TaskResourceRequests().cpus(task_cores).resource("gpu", task_gpus)
-        rp = ResourceProfileBuilder().require(treqs).build
-
-        self.logger.info(
-            f"Training tasks require the resource(cores={task_cores}, gpu={task_gpus})"
-        )
-
-        return rdd.withResources(rp)
-
     def _fit_internal(
         self, dataset: DataFrame, paramMaps: Optional[Sequence["ParamMap"]]
     ) -> List["_CumlModel"]:
@@ -1196,8 +1198,6 @@ def _fit_internal(
             paramMaps=paramMaps,
         )
 
-        pipelined_rdd = self._try_stage_level_scheduling(pipelined_rdd)
-
         self.logger.info(
             f"Training spark-rapids-ml with {self.num_workers} worker(s) ..."
         )