Skip to content

Commit 3c9dc87

Browse files
committed
fix: wait for TASK_STATE_RUNNING before exec_in_container
The test assumed task state matches job state, but tasks can be BUILDING while the job reports RUNNING (workspace bundle install). Add wait_for_task_state helper and use it.
1 parent 948615e commit 3c9dc87

2 files changed

Lines changed: 22 additions & 4 deletions

File tree

tests/integration/iris/cluster.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,25 @@ def wait_for_state(
107107
time.sleep(poll_interval)
108108
raise TimeoutError(f"Job {job.job_id} did not reach state {state} in {timeout}s (current: {status.state})")
109109

110+
def wait_for_task_state(
111+
self,
112+
job: Job,
113+
state: int,
114+
task_index: int = 0,
115+
timeout: float = 60.0,
116+
poll_interval: float = 0.5,
117+
) -> cluster_pb2.TaskStatus:
118+
deadline = time.monotonic() + timeout
119+
task = self.task_status(job, task_index)
120+
while time.monotonic() < deadline:
121+
task = self.task_status(job, task_index)
122+
if task.state == state:
123+
return task
124+
time.sleep(poll_interval)
125+
raise TimeoutError(
126+
f"Task {task_index} of {job.job_id} did not reach state {state} " f"in {timeout}s (current: {task.state})"
127+
)
128+
110129
@contextmanager
111130
def launched_job(self, fn, name: str, *args, **kwargs):
112131
"""Submit a job and guarantee it's killed on exit."""

tests/integration/iris/test_iris_integration.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,9 @@ def _is_running():
258258
def test_exec_in_container(integration_cluster):
259259
"""Exec a command in a running task's container."""
260260
job = integration_cluster.submit(sleep, "itest-exec", 120)
261-
integration_cluster.wait_for_state(job, cluster_pb2.JOB_STATE_RUNNING, timeout=integration_cluster.job_timeout)
262-
263-
task = integration_cluster.task_status(job, task_index=0)
264-
assert task.state == cluster_pb2.TASK_STATE_RUNNING, f"Task stuck in {cluster_pb2.TaskState.Name(task.state)}"
261+
task = integration_cluster.wait_for_task_state(
262+
job, cluster_pb2.TASK_STATE_RUNNING, timeout=integration_cluster.job_timeout
263+
)
265264
task_id = task.task_id
266265

267266
request = cluster_pb2.Controller.ExecInContainerRequest(

0 commit comments

Comments
 (0)