@@ -532,8 +532,19 @@ def test_sync_survives_node_list_failure(provider, k8s):
532532# ---------------------------------------------------------------------------
533533
534534
535+ def _collect_resources_once (provider ) -> None :
536+ """Drive one synchronous resource-collection pass.
537+
538+ reconcile() registers the running-pod set with the background collector;
539+ this runs a single collection against that set without waiting on (or
540+ racing) the collector's poll thread.
541+ """
542+ assert provider ._resource_collector is not None , "reconcile should have started the collector"
543+ provider ._resource_collector ._collect_once ()
544+
545+
535546def test_resource_stats_from_kubectl_top (provider , k8s , task_stats_table ):
536- """Running pods emit IrisTaskStat rows via the background ResourceCollector."""
547+ """Running pods emit IrisTaskStat rows via the ResourceCollector."""
537548
538549 task_id = JobName .from_wire ("/job/0" )
539550 attempt_id = 0
@@ -543,12 +554,9 @@ def test_resource_stats_from_kubectl_top(provider, k8s, task_stats_table):
543554 populate_pod (k8s , pod_name , "Running" )
544555 k8s .set_top_pod (pod_name , PodResourceUsage (cpu_millicores = 500 , memory_bytes = 1024 * 1024 * 1024 ))
545556
546- batch = make_batch (running_tasks = [entry ])
547- # First sync registers the pod with the ResourceCollector.
548- provider .reconcile (batch )
549- # Wait for background collector to fetch and write.
550- time .sleep (2 )
551- # No more sync needed — the row has already been written to the table.
557+ # reconcile registers the pod; then collect once.
558+ provider .reconcile (make_batch (running_tasks = [entry ]))
559+ _collect_resources_once (provider )
552560
553561 rows = [row for batch_rows in task_stats_table .writes for row in batch_rows ]
554562 assert rows , "ResourceCollector did not write any IrisTaskStat rows"
@@ -562,7 +570,7 @@ def test_resource_stats_from_kubectl_top(provider, k8s, task_stats_table):
562570
563571
564572def test_resource_stats_skipped_when_metrics_unavailable (provider , k8s , task_stats_table ):
565- """No IrisTaskStat row is written when kubectl top returns None ."""
573+ """No IrisTaskStat row is written when a pod has no metrics sample ."""
566574 task_id = JobName .from_wire ("/job/0" )
567575 attempt_id = 0
568576 pod_name = _pod_name (task_id , attempt_id )
@@ -571,28 +579,24 @@ def test_resource_stats_skipped_when_metrics_unavailable(provider, k8s, task_sta
571579 populate_pod (k8s , pod_name , "Running" )
572580 k8s .set_top_pod (pod_name , None )
573581
574- batch = make_batch (running_tasks = [entry ])
575- provider .reconcile (batch )
576- time .sleep (2 )
582+ provider .reconcile (make_batch (running_tasks = [entry ]))
583+ _collect_resources_once (provider )
577584
578585 assert task_stats_table .writes == []
579586
580587
581588def test_resource_stats_skipped_when_top_pods_raises (provider , k8s , task_stats_table ):
582- """No IrisTaskStat row is written when the bulk metrics query raises ."""
589+ """A raising bulk metrics query is swallowed; no IrisTaskStat row is written ."""
583590 task_id = JobName .from_wire ("/job/0" )
584591 attempt_id = 0
585592 pod_name = _pod_name (task_id , attempt_id )
586593 entry = RunningTaskEntry (task_id = task_id , attempt_id = attempt_id )
587594
588595 populate_pod (k8s , pod_name , "Running" )
589- # Persistent: the background collector retries on its own cadence, so a
590- # one-shot failure would be consumed and later polls would succeed.
591- k8s .inject_failure ("top_pods" , RuntimeError ("metrics-server unavailable" ), persistent = True )
596+ k8s .inject_persistent_failure ("top_pods" , RuntimeError ("metrics-server unavailable" ))
592597
593- batch = make_batch (running_tasks = [entry ])
594- provider .reconcile (batch )
595- time .sleep (2 )
598+ provider .reconcile (make_batch (running_tasks = [entry ]))
599+ _collect_resources_once (provider )
596600
597601 assert task_stats_table .writes == []
598602
@@ -606,9 +610,8 @@ def test_resource_stats_skipped_for_non_running_pods(provider, k8s, task_stats_t
606610
607611 populate_pod (k8s , pod_name , "Succeeded" )
608612
609- batch = make_batch (running_tasks = [entry ])
610- provider .reconcile (batch )
611- time .sleep (2 )
613+ provider .reconcile (make_batch (running_tasks = [entry ]))
614+ _collect_resources_once (provider )
612615
613616 assert task_stats_table .writes == []
614617
0 commit comments