[Data] Concurrency cap backpressure with tuning (Disabled) (#59519)

srinathk10 · web-flow · commit 0de211850589 · 2025-12-17T15:48:52.000-08:00
EWMA_ALPHA Update EWMA_ALPHA from 0.2->0.1. This makes adjusting level to be more in-favor of limiting concurrency by being more sensitive to downstreaming queuing. K_DEV Update K_DEV from 2.0->1.0. This makes stddev to be more in-favor of limiting concurrency by being more sensitive to downstreaming queuing. cherry-pick of #59392
diff --git a/python/ray/data/_internal/execution/backpressure_policy/concurrency_cap_backpressure_policy.py b/python/ray/data/_internal/execution/backpressure_policy/concurrency_cap_backpressure_policy.py
@@ -41,18 +41,18 @@ class ConcurrencyCapBackpressurePolicy(BackpressurePolicy):
     """
 
     # Smoothing factor for the asymmetric EWMA (slow fall, faster rise).
-    EWMA_ALPHA = env_float("RAY_DATA_CONCURRENCY_CAP_EWMA_ALPHA", 0.2)
+    EWMA_ALPHA = env_float("RAY_DATA_CONCURRENCY_CAP_EWMA_ALPHA", 0.1)
     EWMA_ALPHA_UP = 1.0 - (1.0 - EWMA_ALPHA) ** 2  # fast rise
     # Deadband width in units of the EWMA absolute deviation estimate.
-    K_DEV = env_float("RAY_DATA_CONCURRENCY_CAP_K_DEV", 2.0)
+    K_DEV = env_float("RAY_DATA_CONCURRENCY_CAP_K_DEV", 1.0)
     # Factor to back off when the queue is too large.
     BACKOFF_FACTOR = env_float("RAY_DATA_CONCURRENCY_CAP_BACKOFF_FACTOR", 1)
     # Factor to ramp up when the queue is too small.
     RAMPUP_FACTOR = env_float("RAY_DATA_CONCURRENCY_CAP_RAMPUP_FACTOR", 1)
     # Threshold for per-Op object store budget (available) vs total
     # (available / total) ratio to enable dynamic output queue size backpressure.
-    OBJECT_STORE_BUDGET_RATIO = env_float(
-        "RAY_DATA_CONCURRENCY_CAP_OBJECT_STORE_BUDGET_RATIO", 0.1
+    AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD = env_float(
+        "RAY_DATA_CONCURRENCY_CAP_AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD", 0.1
     )
 
     def __init__(self, *args, **kwargs):
@@ -93,7 +93,7 @@ def __init__(self, *args, **kwargs):
             dynamic_output_queue_size_backpressure_configs = (
                 f", EWMA_ALPHA={self.EWMA_ALPHA}, K_DEV={self.K_DEV}, "
                 f"BACKOFF_FACTOR={self.BACKOFF_FACTOR}, RAMPUP_FACTOR={self.RAMPUP_FACTOR}, "
-                f"OBJECT_STORE_BUDGET_RATIO={self.OBJECT_STORE_BUDGET_RATIO}"
+                f"AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD={self.AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD}"
             )
         logger.debug(
             f"ConcurrencyCapBackpressurePolicy caps: {self._concurrency_caps}, "
@@ -141,10 +141,16 @@ def can_add_input(self, op: "PhysicalOperator") -> bool:
         """Return whether `op` may accept another input now."""
         num_tasks_running = op.metrics.num_tasks_running
 
-        # If not a MapOperator or feature disabled, just enforce configured cap.
+        # Skip dynamic backpressure if:
+        # - Not a MapOperator
+        # - Not eligible for Op for Backpressure
+        # - Dynamic backpressure based on output queue size is disabled
+        # - Downstream is a materializing op which requires full materialization
         if (
             not isinstance(op, MapOperator)
+            or not self._resource_manager.is_op_eligible(op)
             or not self.enable_dynamic_output_queue_size_backpressure
+            or self._resource_manager.has_materializing_downstream_op(op)
         ):
             return num_tasks_running < self._concurrency_caps[op]
 
@@ -156,20 +162,17 @@ def can_add_input(self, op: "PhysicalOperator") -> bool:
             total_mem = op_usage.object_store_memory + op_budget.object_store_memory
             if total_mem == 0 or (
                 op_budget.object_store_memory / total_mem
-                > self.OBJECT_STORE_BUDGET_RATIO
+                > self.AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
             ):
                 # If the objectstore budget (available) to total
                 # ratio is above threshold (10%), skip dynamic output queue size
                 # backpressure, but still enforce the configured cap.
                 return num_tasks_running < self._concurrency_caps[op]
 
         # Current total queued bytes (this op + downstream)
-        current_queue_size_bytes = (
-            self._resource_manager.get_op_internal_object_store_usage(op)
-            + self._resource_manager.get_op_outputs_object_store_usage_with_downstream(
-                op
-            )
-        )
+        current_queue_size_bytes = self._resource_manager.get_mem_op_internal(
+            op
+        ) + self._resource_manager.get_op_outputs_object_store_usage_with_downstream(op)
 
         # Update EWMA state (level & dev) and compute effective cap. Note that
         # we don't update the EWMA state if the objectstore budget (available) vs total
diff --git a/python/ray/data/_internal/execution/resource_manager.py b/python/ray/data/_internal/execution/resource_manager.py
@@ -135,8 +135,8 @@ def _warn_about_object_store_memory_if_needed(self):
             ):
                 logger.warning(
                     f"{WARN_PREFIX} Ray's object store is configured to use only "
-                    f"{object_store_fraction:.1%} of available memory ({object_store_memory/GiB:.1f}GiB "
-                    f"out of {total_memory/GiB:.1f}GiB total). For optimal Ray Data performance, "
+                    f"{object_store_fraction:.1%} of available memory ({object_store_memory / GiB:.1f}GiB "
+                    f"out of {total_memory / GiB:.1f}GiB total). For optimal Ray Data performance, "
                     f"we recommend setting the object store to at least 50% of available memory. "
                     f"You can do this by setting the 'object_store_memory' parameter when calling "
                     f"ray.init() or by setting the RAY_DEFAULT_OBJECT_STORE_MEMORY_PROPORTION environment variable."
@@ -268,6 +268,14 @@ def get_op_usage(self, op: PhysicalOperator) -> ExecutionResources:
         """Return the resource usage of the given operator at the current time."""
         return self._op_usages[op]
 
+    def get_mem_op_internal(self, op: PhysicalOperator) -> int:
+        """Return the memory usage of the internal buffers of the given operator."""
+        return self._mem_op_internal[op]
+
+    def get_mem_op_outputs(self, op: PhysicalOperator) -> int:
+        """Return the memory usage of the outputs of the given operator."""
+        return self._mem_op_outputs[op]
+
     def get_op_usage_str(self, op: PhysicalOperator, *, verbose: bool) -> str:
         """Return a human-readable string representation of the resource usage of
         the given operator."""
@@ -286,8 +294,8 @@ def get_op_usage_str(self, op: PhysicalOperator, *, verbose: bool) -> str:
 
         if verbose:
             usage_str += (
-                f" (in={memory_string(self._mem_op_internal[op])},"
-                f"out={memory_string(self._mem_op_outputs[op])})"
+                f" (in={memory_string(self.get_mem_op_internal(op))},"
+                f"out={memory_string(self.get_mem_op_outputs(op))})"
             )
             if self._op_resource_allocator is not None:
                 allocation = self._op_resource_allocator.get_allocation(op)
@@ -394,9 +402,12 @@ def get_op_outputs_object_store_usage_with_downstream(
         )
         return op_outputs_usage
 
-    def get_op_internal_object_store_usage(self, op: PhysicalOperator) -> int:
-        """Get the internal object store memory usage of the given operator"""
-        return self._mem_op_internal[op]
+    def has_materializing_downstream_op(self, op: PhysicalOperator) -> bool:
+        """Check if the operator has a downstream materializing operator."""
+        return any(
+            isinstance(next_op, MATERIALIZING_OPERATORS)
+            for next_op in op.output_dependencies
+        )
 
 
 def _get_first_pending_shuffle_op(topology: "Topology") -> int:
@@ -832,9 +843,7 @@ def update_budgets(
             op_mem_usage = 0
             # Add the memory usage of the operator itself,
             # excluding `_reserved_for_op_outputs`.
-            op_mem_usage += self._resource_manager.get_op_internal_object_store_usage(
-                op
-            )
+            op_mem_usage += self._resource_manager.get_mem_op_internal(op)
             # Add the portion of op outputs usage that has
             # exceeded `_reserved_for_op_outputs`.
             op_outputs_usage = self._resource_manager.get_op_outputs_object_store_usage_with_downstream(
diff --git a/python/ray/data/context.py b/python/ray/data/context.py
@@ -251,6 +251,7 @@ class ShuffleStrategy(str, enum.Enum):
 )
 
 
+# Dynamic output queue size backpressure disabled by default.
 DEFAULT_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSURE: bool = env_bool(
     "RAY_DATA_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSURE", False
 )
diff --git a/python/ray/data/tests/test_backpressure_policies.py b/python/ray/data/tests/test_backpressure_policies.py
@@ -60,10 +60,16 @@ def test_basic(self):
             map_op_no_concurrency: MagicMock(),
         }
 
+        mock_resource_manager = MagicMock()
+        # Return None to skip dynamic output queue size backpressure check
+        mock_resource_manager.get_op_usage.return_value = None
+        mock_resource_manager.get_budget.return_value = None
+        mock_resource_manager.is_op_eligible.return_value = False
+
         policy = ConcurrencyCapBackpressurePolicy(
             DataContext.get_current(),
             topology,
-            MagicMock(),
+            mock_resource_manager,
         )
 
         self.assertEqual(policy._concurrency_caps[map_op], concurrency)
@@ -177,6 +183,67 @@ def test_can_add_input_with_non_map_operator(self):
         # InputDataBuffer has infinite concurrency cap, so should always allow
         self.assertTrue(policy.can_add_input(input_op))
 
+    def test_can_add_input_with_ineligible_op(self):
+        """Test can_add_input when op is not eligible for backpressure."""
+        input_op = InputDataBuffer(DataContext.get_current(), input_data=[MagicMock()])
+        map_op = TaskPoolMapOperator(
+            map_transformer=MagicMock(),
+            data_context=DataContext.get_current(),
+            input_op=input_op,
+            max_concurrency=5,
+        )
+        map_op.metrics.num_tasks_running = 3
+
+        topology = {map_op: MagicMock(), input_op: MagicMock()}
+
+        mock_resource_manager = MagicMock()
+        # Op is not eligible for backpressure
+        mock_resource_manager.is_op_eligible.return_value = False
+
+        policy = ConcurrencyCapBackpressurePolicy(
+            DataContext.get_current(),
+            topology,
+            mock_resource_manager,
+        )
+        policy.enable_dynamic_output_queue_size_backpressure = True
+
+        # Should skip dynamic backpressure and use basic cap check
+        self.assertTrue(policy.can_add_input(map_op))  # 3 < 5
+
+        map_op.metrics.num_tasks_running = 5
+        self.assertFalse(policy.can_add_input(map_op))  # 5 >= 5
+
+    def test_can_add_input_with_materializing_downstream_op(self):
+        """Test can_add_input when downstream op is a materializing operator."""
+        input_op = InputDataBuffer(DataContext.get_current(), input_data=[MagicMock()])
+        map_op = TaskPoolMapOperator(
+            map_transformer=MagicMock(),
+            data_context=DataContext.get_current(),
+            input_op=input_op,
+            max_concurrency=5,
+        )
+        map_op.metrics.num_tasks_running = 3
+
+        topology = {map_op: MagicMock(), input_op: MagicMock()}
+
+        mock_resource_manager = MagicMock()
+        mock_resource_manager.is_op_eligible.return_value = True
+        mock_resource_manager.has_materializing_downstream_op.return_value = True
+
+        policy = ConcurrencyCapBackpressurePolicy(
+            DataContext.get_current(),
+            topology,
+            mock_resource_manager,
+        )
+        policy.enable_dynamic_output_queue_size_backpressure = True
+
+        # Should skip dynamic backpressure and use basic cap check
+        # to avoid starving materializing operators
+        self.assertTrue(policy.can_add_input(map_op))  # 3 < 5
+
+        map_op.metrics.num_tasks_running = 5
+        self.assertFalse(policy.can_add_input(map_op))  # 5 >= 5
+
     def test_can_add_input_with_object_store_memory_usage_ratio_above_threshold(self):
         """Test can_add_input when object store memory usage ratio is above threshold."""
         input_op = InputDataBuffer(DataContext.get_current(), input_data=[MagicMock()])
@@ -193,8 +260,10 @@ def test_can_add_input_with_object_store_memory_usage_ratio_above_threshold(self
         mock_resource_manager = MagicMock()
 
         # Mock object store memory usage ratio above threshold
-        # Ratio = budget / (usage + budget) > OBJECT_STORE_BUDGET_RATIO
-        threshold = ConcurrencyCapBackpressurePolicy.OBJECT_STORE_BUDGET_RATIO
+        # Ratio = budget / (usage + budget) > AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
+        threshold = (
+            ConcurrencyCapBackpressurePolicy.AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
+        )
         mock_usage = MagicMock()
         mock_usage.object_store_memory = 1000  # usage
         mock_budget = MagicMock()
@@ -207,6 +276,8 @@ def test_can_add_input_with_object_store_memory_usage_ratio_above_threshold(self
 
         mock_resource_manager.get_op_usage.return_value = mock_usage
         mock_resource_manager.get_budget.return_value = mock_budget
+        mock_resource_manager.is_op_eligible.return_value = True
+        mock_resource_manager.has_materializing_downstream_op.return_value = False
 
         policy = ConcurrencyCapBackpressurePolicy(
             DataContext.get_current(),
@@ -249,8 +320,10 @@ def test_can_add_input_with_object_store_memory_usage_ratio_below_threshold(self
         mock_resource_manager = MagicMock()
 
         # Mock object store memory usage ratio below threshold
-        # Ratio = budget / (usage + budget) < OBJECT_STORE_BUDGET_RATIO
-        threshold = ConcurrencyCapBackpressurePolicy.OBJECT_STORE_BUDGET_RATIO
+        # Ratio = budget / (usage + budget) < AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
+        threshold = (
+            ConcurrencyCapBackpressurePolicy.AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
+        )
         mock_usage = MagicMock()
         mock_usage.object_store_memory = 1000  # usage
         mock_budget = MagicMock()
@@ -263,9 +336,11 @@ def test_can_add_input_with_object_store_memory_usage_ratio_below_threshold(self
 
         mock_resource_manager.get_op_usage.return_value = mock_usage
         mock_resource_manager.get_budget.return_value = mock_budget
+        mock_resource_manager.is_op_eligible.return_value = True
+        mock_resource_manager.has_materializing_downstream_op.return_value = False
 
         # Mock queue size methods
-        mock_resource_manager.get_op_internal_object_store_usage.return_value = 100
+        mock_resource_manager.get_mem_op_internal.return_value = 100
         mock_resource_manager.get_op_outputs_object_store_usage_with_downstream.return_value = (
             200
         )
@@ -286,9 +361,10 @@ def test_can_add_input_with_object_store_memory_usage_ratio_below_threshold(self
         policy._q_level_dev[map_op] = initial_dev
 
         result = policy.can_add_input(map_op)
-        # With queue size 300, initial level=200, dev=50, bounds=[100, 300]
-        # Queue size 300 is at the upper bound, so should hold.
-        # running=3 < effective_cap=3 should be False
+        # With queue size 300, initial level=200, dev=50, bounds=[150, 250]
+        # Queue size 300 is above the upper bound, so should backoff.
+        # running=3, backoff by 1 -> effective_cap=2
+        # running=3 < effective_cap=2 should be False
         self.assertFalse(result)
         # EWMA state should be updated when ratio < threshold
         # Level should move toward 300 (queue size)
@@ -310,7 +386,9 @@ def test_can_add_input_effective_cap_calculation(self):
         topology = {map_op: MagicMock(), input_op: MagicMock()}
 
         mock_resource_manager = MagicMock()
-        threshold = ConcurrencyCapBackpressurePolicy.OBJECT_STORE_BUDGET_RATIO
+        threshold = (
+            ConcurrencyCapBackpressurePolicy.AVAILABLE_OBJECT_STORE_BUDGET_THRESHOLD
+        )
         mock_usage = MagicMock()
         mock_usage.object_store_memory = 1000
         mock_budget = MagicMock()
@@ -323,6 +401,8 @@ def test_can_add_input_effective_cap_calculation(self):
 
         mock_resource_manager.get_op_usage.return_value = mock_usage
         mock_resource_manager.get_budget.return_value = mock_budget
+        mock_resource_manager.is_op_eligible.return_value = True
+        mock_resource_manager.has_materializing_downstream_op.return_value = False
 
         policy = ConcurrencyCapBackpressurePolicy(
             DataContext.get_current(),
@@ -369,9 +449,7 @@ def test_can_add_input_effective_cap_calculation(self):
             description,
         ) in test_cases:
             with self.subTest(description=description):
-                mock_resource_manager.get_op_internal_object_store_usage.return_value = (
-                    internal_usage
-                )
+                mock_resource_manager.get_mem_op_internal.return_value = internal_usage
                 mock_resource_manager.get_op_outputs_object_store_usage_with_downstream.return_value = (
                     downstream_usage
                 )

Original file line number	Diff line number	Diff line change
`@@ -251,6 +251,7 @@ class ShuffleStrategy(str, enum.Enum):`
`251`	`251`	`)`
`252`	`252`
`253`	`253`
	`254`	`+# Dynamic output queue size backpressure disabled by default.`
`254`	`255`	`DEFAULT_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSURE: bool = env_bool(`
`255`	`256`	`"RAY_DATA_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSURE", False`
`256`	`257`	`)`