Open-Athena
diff --git a/‎lib/iris/src/iris/actor/server.py‎
Lines changed: 1 addition & 0 deletions b/‎lib/iris/src/iris/actor/server.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/iris/src/iris/client/client.py‎
Lines changed: 3 additions & 3 deletions b/‎lib/iris/src/iris/client/client.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lib/iris/src/iris/cluster/client/protocol.py‎
Lines changed: 1 addition & 1 deletion b/‎lib/iris/src/iris/cluster/client/protocol.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/iris/src/iris/cluster/client/remote_client.py‎
Lines changed: 3 additions & 3 deletions b/‎lib/iris/src/iris/cluster/client/remote_client.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/controller.py‎
Lines changed: 72 additions & 29 deletions b/‎lib/iris/src/iris/cluster/controller/controller.py‎
Lines changed: 72 additions & 29 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/scaling_group.py‎
Lines changed: 14 additions & 2 deletions b/‎lib/iris/src/iris/cluster/controller/scaling_group.py‎
Lines changed: 14 additions & 2 deletions
@@ -225,6 +225,7 @@ def serve_background(self, port: int | None = None) -> int:
             host=self._host,
             port=self._actual_port,
             log_level="error",
+            log_config=None,
             timeout_keep_alive=120,
         )
         self._server = uvicorn.Server(config)
 
@@ -817,7 +817,7 @@ def fetch_task_logs(
         include_children: bool = False,
         start: Timestamp | None = None,
         max_lines: int = 0,
-        regex: str | None = None,
+        substring: str | None = None,
         attempt_id: int = -1,
         min_level: str = "",
     ) -> list[TaskLogEntry]:
@@ -828,7 +828,7 @@ def fetch_task_logs(
             include_children: Include logs from child jobs (job ID only)
             start: Only return logs after this timestamp (None = from beginning)
             max_lines: Maximum number of log lines to return (0 = unlimited)
-            regex: Regex filter for log content
+            substring: Substring filter for log content
             attempt_id: Filter to specific attempt (-1 = all attempts)
             min_level: Minimum log level filter (DEBUG/INFO/WARNING/ERROR/CRITICAL)
 
@@ -840,7 +840,7 @@ def fetch_task_logs(
             include_children=include_children,
             since_ms=start.epoch_ms() if start else 0,
             max_total_lines=max_lines,
-            regex=regex,
+            substring=substring,
             attempt_id=attempt_id,
             min_level=min_level,
         )
 
@@ -104,7 +104,7 @@ def fetch_task_logs(
         include_children: bool = False,
         since_ms: int = 0,
         max_total_lines: int = 0,
-        regex: str | None = None,
+        substring: str | None = None,
         attempt_id: int = -1,
         cursor: int = 0,
         min_level: str = "",
 
@@ -340,7 +340,7 @@ def fetch_task_logs(
         include_children: bool = False,
         since_ms: int = 0,
         max_total_lines: int = 0,
-        regex: str | None = None,
+        substring: str | None = None,
         attempt_id: int = -1,
         cursor: int = 0,
         min_level: str = "",
@@ -352,7 +352,7 @@ def fetch_task_logs(
             include_children: Include logs from child jobs (job ID only)
             since_ms: Only return logs after this timestamp (exclusive)
             max_total_lines: Maximum total lines (0 = default 10000)
-            regex: Regex filter for log content
+            substring: Substring filter for log content
             attempt_id: Filter to specific attempt (-1 = all attempts)
             cursor: Autoincrement id cursor for incremental polling
             min_level: Minimum log level filter (DEBUG/INFO/WARNING/ERROR/CRITICAL)
@@ -362,7 +362,7 @@ def fetch_task_logs(
             include_children=include_children,
             since_ms=since_ms,
             max_total_lines=max_total_lines,
-            regex=regex or "",
+            substring=substring or "",
             attempt_id=attempt_id,
             cursor=cursor,
             min_level=min_level,
 
@@ -17,9 +17,18 @@
 import uvicorn
 
 from iris.chaos import chaos
+from iris.cluster.constraints import (
+    AttributeValue,
+    Constraint,
+    PlacementRequirements,
+    WellKnownAttribute,
+    constraints_from_resources,
+    evaluate_constraint,
+    extract_placement_requirements,
+    merge_constraints,
+)
 from iris.cluster.controller.autoscaler import Autoscaler, DemandEntry
 from iris.cluster.controller.dashboard import ControllerDashboard
-from iris.cluster.log_store import LogStoreHandler, PROCESS_LOG_KEY
 from iris.cluster.controller.events import TaskAssignedEvent, TaskStateChangedEvent
 from iris.cluster.controller.scheduler import (
     JobRequirements,
@@ -28,40 +37,32 @@
     WorkerSnapshot,
 )
 from iris.cluster.controller.service import ControllerServiceImpl
+from iris.cluster.controller.snapshot import (
+    SnapshotResult,
+    create_snapshot,
+    read_latest_snapshot,
+    restore_scaling_group,
+    restore_snapshot,
+    restore_tracked_workers,
+    write_snapshot,
+)
 from iris.cluster.controller.state import (
     HEARTBEAT_FAILURE_THRESHOLD,
+    RESERVATION_HOLDER_JOB_NAME,
     ControllerJob,
     ControllerState,
     ControllerTask,
     ControllerWorker,
     HeartbeatSnapshot,
     ReservationClaim,
 )
-from iris.cluster.constraints import (
-    AttributeValue,
-    Constraint,
-    PlacementRequirements,
-    WellKnownAttribute,
-    constraints_from_resources,
-    evaluate_constraint,
-    extract_placement_requirements,
-    merge_constraints,
-)
+from iris.cluster.log_store import PROCESS_LOG_KEY, LogStoreHandler
 from iris.cluster.types import (
     JobName,
     VmWorkerStatus,
     VmWorkerStatusMap,
     WorkerId,
 )
-from iris.cluster.controller.snapshot import (
-    SnapshotResult,
-    create_snapshot,
-    read_latest_snapshot,
-    restore_scaling_group,
-    restore_snapshot,
-    restore_tracked_workers,
-    write_snapshot,
-)
 from iris.logging import slow_log
 from iris.managed_thread import ManagedThread, ThreadContainer, get_thread_container
 from iris.rpc import cluster_pb2, snapshot_pb2
@@ -145,13 +146,15 @@ def compute_demand_entries(
     # Also track which jobs have reservations so we can apply taint injection.
     jobs: dict[JobName, JobRequirements] = {}
     has_reservation: set[JobName] = set()
+    has_direct_reservation: set[JobName] = set()
     for task in all_schedulable:
         if task.job_id not in jobs:
             job = state.get_job(task.job_id)
             if job:
                 jobs[task.job_id] = job_requirements_from_job(job)
                 if job.request.HasField("reservation"):
                     has_reservation.add(task.job_id)
+                    has_direct_reservation.add(task.job_id)
                 elif _find_reservation_ancestor(state, task.job_id) is not None:
                     has_reservation.add(task.job_id)
 
@@ -163,7 +166,7 @@ def compute_demand_entries(
         task_ids = [t.task_id for t in all_schedulable]
         claims = reservation_claims or {}
         dry_run_workers = _inject_reservation_taints(workers, claims)
-        dry_run_jobs = _inject_taint_constraints(jobs, has_reservation)
+        dry_run_jobs = _inject_taint_constraints(jobs, has_reservation, has_direct_reservation)
 
         context = scheduler.create_scheduling_context(
             dry_run_workers,
@@ -289,24 +292,42 @@ def _inject_reservation_taints(
 def _inject_taint_constraints(
     jobs: dict[JobName, JobRequirements],
     has_reservation: set[JobName],
+    has_direct_reservation: set[JobName] | None = None,
 ) -> dict[JobName, JobRequirements]:
-    """Add NOT_EXISTS reservation-job constraint to non-reservation jobs.
-
-    This prevents normal jobs from being scheduled onto claimed workers.
-    Reservation jobs are left unchanged — they can use both claimed and
-    unclaimed workers (the reservation is a floor, not a ceiling).
+    """Add reservation taint constraints to jobs.
+
+    Three-way logic:
+    - Direct reservation jobs (has_direct_reservation): get an EQ constraint
+      forcing them onto their claimed workers only.
+    - Descendants of reservation jobs (has_reservation minus direct): no
+      constraint — they can use both claimed and unclaimed workers.
+    - Non-reservation jobs: get a NOT_EXISTS constraint blocking them from
+      claimed workers.
     """
     if not has_reservation and not jobs:
         return jobs
 
+    if has_direct_reservation is None:
+        has_direct_reservation = set()
+
     taint_constraint = cluster_pb2.Constraint(
         key=RESERVATION_TAINT_KEY,
         op=cluster_pb2.CONSTRAINT_OP_NOT_EXISTS,
     )
 
     modified: dict[JobName, JobRequirements] = {}
     for job_id, req in jobs.items():
-        if job_id in has_reservation:
+        if job_id in has_direct_reservation:
+            eq_constraint = cluster_pb2.Constraint(
+                key=RESERVATION_TAINT_KEY,
+                op=cluster_pb2.CONSTRAINT_OP_EQ,
+                value=cluster_pb2.AttributeValue(string_value=job_id.to_wire()),
+            )
+            modified[job_id] = replace(
+                req,
+                constraints=[*list(req.constraints), eq_constraint],
+            )
+        elif job_id in has_reservation:
             modified[job_id] = req
         else:
             modified[job_id] = replace(
@@ -417,7 +438,14 @@ def _preference_pass(
             continue
 
         job_wire = job_id.to_wire()
-        for wid in claimed_by_job.get(job_wire, ()):
+        # Holder jobs are children of the reservation job — look up claims
+        # under the parent's wire ID.
+        claim_key = job_wire
+        if RESERVATION_HOLDER_JOB_NAME in job_wire:
+            parent = job_id.parent
+            if parent is not None:
+                claim_key = parent.to_wire()
+        for wid in claimed_by_job.get(claim_key, ()):
             if context.assignment_counts.get(wid, 0) >= context.max_assignments_per_worker:
                 continue
             capacity = context.capacities.get(wid)
@@ -494,6 +522,12 @@ class ControllerConfig:
     max_dispatch_parallelism: int = 32
     """Maximum number of concurrent RPC dispatch operations."""
 
+    max_tasks_per_job_per_cycle: int = 4
+    """Maximum tasks from a single non-coscheduled job to consider per scheduling
+    cycle. Bounds CPU time in the scheduler when many tasks are pending, preventing
+    GIL starvation of the heartbeat thread. Coscheduled jobs are exempt (they need
+    all tasks for atomic assignment). Set to 0 for unlimited."""
+
     heartbeat_failure_threshold: int = HEARTBEAT_FAILURE_THRESHOLD
     """Consecutive heartbeat failures before marking worker as dead."""
 
@@ -656,6 +690,7 @@ def start(self) -> None:
             host=self._config.host,
             port=self._config.port,
             log_level="warning",
+            log_config=None,
             timeout_keep_alive=120,
         )
         self._server = uvicorn.Server(server_config)
@@ -847,9 +882,13 @@ def _run_scheduling(self) -> None:
 
         # Handle timeouts and reservation gates before scheduling.
         # Holder tasks participate in scheduling like normal tasks.
+        # Cap non-coscheduled tasks per job to bound scheduling CPU time.
         schedulable_task_ids: list[JobName] = []
         jobs: dict[JobName, JobRequirements] = {}
         has_reservation: set[JobName] = set()
+        has_direct_reservation: set[JobName] = set()
+        tasks_per_job: dict[JobName, int] = defaultdict(int)
+        cap = self._config.max_tasks_per_job_per_cycle
         for task in pending_tasks:
             if not task.can_be_scheduled():
                 continue
@@ -863,11 +902,15 @@ def _run_scheduling(self) -> None:
             # Holder tasks are always schedulable (they ARE the reservation).
             if not job.is_reservation_holder and not self._is_reservation_satisfied(job):
                 continue
+            if cap > 0 and not job.is_coscheduled and tasks_per_job[task.job_id] >= cap:
+                continue
+            tasks_per_job[task.job_id] += 1
             schedulable_task_ids.append(task.task_id)
             if task.job_id not in jobs:
                 jobs[task.job_id] = job_requirements_from_job(job)
                 if job.request.HasField("reservation"):
                     has_reservation.add(task.job_id)
+                    has_direct_reservation.add(task.job_id)
                 elif _find_reservation_ancestor(self._state, task.job_id) is not None:
                     has_reservation.add(task.job_id)
 
@@ -877,7 +920,7 @@ def _run_scheduling(self) -> None:
         # Inject reservation taints: claimed workers get a taint attribute,
         # non-reservation jobs get a NOT_EXISTS constraint for it.
         modified_workers = _inject_reservation_taints(workers, self._reservation_claims)
-        jobs = _inject_taint_constraints(jobs, has_reservation)
+        jobs = _inject_taint_constraints(jobs, has_reservation, has_direct_reservation)
 
         with slow_log(logger, "snapshot_building_counts", threshold_ms=50):
             building_counts = self._state.snapshot_building_counts()
 
@@ -173,10 +173,19 @@ def _lifecycle_to_vm_state(lifecycle: SliceLifecycleState) -> vm_pb2.VmState:
     }[lifecycle]
 
 
-def slice_state_to_proto(state: SliceState) -> vm_pb2.SliceInfo:
+def slice_state_to_proto(state: SliceState, idle_threshold: Duration | None = None) -> vm_pb2.SliceInfo:
     """Convert a SliceState to a SliceInfo proto for RPC APIs."""
     created_at = state.handle.created_at
     vm_state = _lifecycle_to_vm_state(state.lifecycle)
+
+    is_idle = False
+    if idle_threshold is not None and state.lifecycle == SliceLifecycleState.READY:
+        if state.last_active.epoch_ms() == 0:
+            is_idle = True
+        else:
+            idle_duration = Duration.from_ms(Timestamp.now().epoch_ms() - state.last_active.epoch_ms())
+            is_idle = idle_duration >= idle_threshold
+
     return vm_pb2.SliceInfo(
         slice_id=state.handle.slice_id,
         scale_group=state.handle.scale_group,
@@ -192,6 +201,8 @@ def slice_state_to_proto(state: SliceState) -> vm_pb2.SliceInfo:
             for i, addr in enumerate(state.vm_addresses)
         ],
         error_message=state.error_message,
+        last_active=state.last_active.to_proto(),
+        idle=is_idle,
     )
 
 
@@ -997,7 +1008,8 @@ def to_status(self) -> vm_pb2.ScaleGroupStatus:
             availability_reason=availability.reason,
             blocked_until=blocked_until.to_proto(),
             scale_up_cooldown_until=cooldown_until.to_proto(),
-            slices=[slice_state_to_proto(state) for state in snapshot],
+            slices=[slice_state_to_proto(state, idle_threshold=self._idle_threshold) for state in snapshot],
+            idle_threshold_ms=self._idle_threshold.to_ms(),
         )
         for state_name, count in counts.items():
             status.slice_state_counts[state_name] = count
Original file line number	Diff line number	Diff line change
`@@ -225,6 +225,7 @@ def serve_background(self, port: int \| None = None) -> int:`
`225`	`225`	`host=self._host,`
`226`	`226`	`port=self._actual_port,`
`227`	`227`	`log_level="error",`
	`228`	`+ log_config=None,`
`228`	`229`	`timeout_keep_alive=120,`
`229`	`230`	`)`
`230`	`231`	`self._server = uvicorn.Server(config)`