feat(job-manager): add support for MultiKueue (#494)

CameronMcClymont · CameronMcClymont · commit 14f3705a3471 · 2025-09-03T17:24:14.000+02:00
Closes #493
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -85,6 +85,9 @@
         "kubernetes_memory_limit": {
           "type": "string"
         },
+        "kubernetes_queue": {
+          "type": "string"
+        },
         "kubernetes_uid": {
           "format": "int32",
           "type": "integer"
diff --git a/reana_job_controller/config.py b/reana_job_controller/config.py
@@ -180,7 +180,7 @@
 USE_KUEUE = bool(strtobool(os.getenv("USE_KUEUE", "False")))
 """Whether to use Kueue to manage job execution."""
 
-KUEUE_LOCAL_QUEUE_NAME = "local-queue-job"
+KUEUE_LOCAL_QUEUE_NAME = "job"
 """Name of the local queue to be used by Kueue."""
 
 REANA_USER_ID = os.getenv("REANA_USER_ID")
diff --git a/reana_job_controller/job_monitor.py b/reana_job_controller/job_monitor.py
@@ -17,8 +17,7 @@
 from kubernetes import client, watch
 from reana_commons.config import REANA_RUNTIME_KUBERNETES_NAMESPACE
 from reana_commons.k8s.api_client import current_k8s_corev1_api_client
-from reana_db.database import Session
-from reana_db.models import Job, JobStatus
+from reana_db.models import JobStatus
 
 from reana_job_controller.config import (
     COMPUTE_BACKENDS,
@@ -32,10 +31,10 @@
     C4P_SSH_TIMEOUT,
     C4P_SSH_BANNER_TIMEOUT,
     C4P_SSH_AUTH_TIMEOUT,
+    USE_KUEUE,
 )
 
 from reana_job_controller.job_db import JOB_DB, store_job_logs, update_job_status
-from reana_job_controller.kubernetes_job_manager import KubernetesJobManager
 from reana_job_controller.utils import (
     SSHClient,
     singleton,
@@ -115,7 +114,7 @@ def get_backend_job_id(self, job_pod):
         """
         return job_pod.metadata.labels["job-name"]
 
-    def should_process_job(self, job_pod) -> bool:
+    def should_process_job_pod(self, job_pod) -> bool:
         """Decide whether the job should be processed or not.
 
         Each job is processed only once, when it reaches a final state (either `failed` or `finished`).
@@ -141,6 +140,27 @@ def should_process_job(self, job_pod) -> bool:
 
         return is_job_in_remaining_jobs and is_job_completed
 
+    def should_process_job(self, job) -> bool:
+        """Decide whether the job should be processed or not.
+
+        Each job is processed only once, when it reaches a final state (either `failed` or `finished`).
+
+        :param job: Compute backend job object (Kubernetes V1Job
+            https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Job.md)
+        """
+        remaining_jobs = self._get_remaining_jobs(
+            statuses_to_skip=[
+                JobStatus.finished.name,
+                JobStatus.failed.name,
+                JobStatus.stopped.name,
+            ]
+        )
+
+        is_job_in_remaining_jobs = job.metadata.name in remaining_jobs
+        is_job_completed = job.status.succeeded and not job.status.active
+
+        return is_job_in_remaining_jobs and is_job_completed
+
     @staticmethod
     def _get_job_container_statuses(job_pod):
         return (job_pod.status.container_statuses or []) + (
@@ -235,46 +255,99 @@ def watch_jobs(self, job_db, app=None):
 
         :param job_db: Dictionary which contains all current jobs.
         """
-        while True:
-            logging.info("Starting a new stream request to watch Jobs")
-            try:
-                w = watch.Watch()
-                for event in w.stream(
-                    current_k8s_corev1_api_client.list_namespaced_pod,
-                    namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
-                    label_selector=f"reana-run-job-workflow-uuid={self.workflow_uuid}",
-                ):
-                    logging.info("New Pod event received: {0}".format(event["type"]))
-                    job_pod = event["object"]
-
-                    # Each job is processed once, when reaching a final state
-                    # (either successfully or not)
-                    if self.should_process_job(job_pod):
-                        job_status = self.get_job_status(job_pod)
-                        backend_job_id = self.get_backend_job_id(job_pod)
-                        reana_job_id = self.get_reana_job_id(backend_job_id)
+        # If using MultiKueue, watch jobs instead of pods since worker pods could be
+        # running on a remote cluster that we can't directly monitor
+        if True:# todo change to USE_KUEUE
+            while True:
+                logging.info("Starting a new stream request to watch Jobs")
 
-                        logs = self.job_manager_cls.get_logs(
-                            backend_job_id, job_pod=job_pod
+                try:
+                    w = watch.Watch()
+                    for event in w.stream(
+                        client.BatchV1Api().list_namespaced_job,
+                        namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
+                        label_selector=f"reana-run-job-workflow-uuid={self.workflow_uuid}",
+                    ):
+                        logging.info(f"New Job event received: {event["type"]}")
+
+                        job = event["object"]
+                        job_id = job.metadata.name
+                        job_finished = job.status.succeeded and not job.status.active and not job.status.failed
+                        job_status = (
+                            JobStatus.finished.name
+                            if job_finished
+                            else (
+                                JobStatus.failed.name
+                                if job.status.failed
+                                else JobStatus.running.name
+                            )
                         )
 
-                        if job_status == JobStatus.failed.name:
-                            self.log_disruption(
-                                event["object"].status.conditions, backend_job_id
+                        if self.should_process_job(job):
+                            reana_job_id = self.get_reana_job_id(job_id)
+
+                            if job_status == JobStatus.failed.name:
+                                self.log_disruption(
+                                    event["object"].status.conditions, job_id
+                                )
+
+                            # TODO: fetch logs from pod on remote worker when MultiKueue supports this
+                            # logs = self.job_manager_cls.get_logs(job_id)
+                            # store_job_logs(reana_job_id, logs)
+
+                            update_job_status(
+                                reana_job_id,
+                                job_status,
                             )
 
-                        store_job_logs(reana_job_id, logs)
-                        update_job_status(reana_job_id, job_status)
+                            if JobStatus.should_cleanup_job(job_status):
+                                self.clean_job(job_id)
 
-                        if JobStatus.should_cleanup_job(job_status):
-                            self.clean_job(backend_job_id)
-            except client.rest.ApiException as e:
-                logging.exception(
-                    f"Error from Kubernetes API while watching jobs pods: {e}"
-                )
-            except Exception as e:
-                logging.error(traceback.format_exc())
-                logging.error("Unexpected error: {}".format(e))
+                except client.rest.ApiException as e:
+                    logging.exception(f"Error from Kubernetes API while watching jobs: {e}")
+                except Exception as e:
+                    logging.error(traceback.format_exc())
+                    logging.error("Unexpected error: {}".format(e))
+        else:
+            while True:
+                try:
+                    w = watch.Watch()
+                    for event in w.stream(
+                        current_k8s_corev1_api_client.list_namespaced_pod,
+                        namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
+                        label_selector=f"reana-run-job-workflow-uuid={self.workflow_uuid}",
+                    ):
+                        logging.info("New Pod event received: {0}".format(event["type"]))
+                        job_pod = event["object"]
+
+                        # Each job is processed once, when reaching a final state
+                        # (either successfully or not)
+                        if self.should_process_job_pod(job_pod):
+                            job_status = self.get_job_status(job_pod)
+                            backend_job_id = self.get_backend_job_id(job_pod)
+                            reana_job_id = self.get_reana_job_id(backend_job_id)
+
+                            logs = self.job_manager_cls.get_logs(
+                                backend_job_id, job_pod=job_pod
+                            )
+
+                            if job_status == JobStatus.failed.name:
+                                self.log_disruption(
+                                    event["object"].status.conditions, backend_job_id
+                                )
+
+                            store_job_logs(reana_job_id, logs)
+                            update_job_status(reana_job_id, job_status)
+
+                            if JobStatus.should_cleanup_job(job_status):
+                                self.clean_job(backend_job_id)
+                except client.rest.ApiException as e:
+                    logging.exception(
+                        f"Error from Kubernetes API while watching jobs pods: {e}"
+                    )
+                except Exception as e:
+                    logging.error(traceback.format_exc())
+                    logging.error("Unexpected error: {}".format(e))
 
     def log_disruption(self, conditions, backend_job_id):
         """Log disruption message from Kubernetes event conditions.
diff --git a/reana_job_controller/kubernetes_job_manager.py b/reana_job_controller/kubernetes_job_manager.py
@@ -67,6 +67,13 @@ class KubernetesJobManager(JobManager):
     MAX_NUM_JOB_RESTARTS = 0
     """Maximum number of job restarts in case of internal failures."""
 
+    @property
+    def secrets(self):
+        """Get cached secrets if present, otherwise fetch them from k8s."""
+        if self._secrets is None:
+            self._secrets = UserSecretsStore.fetch(REANA_USER_ID)
+        return self._secrets
+
     def __init__(
         self,
         docker_img=None,
@@ -81,6 +88,7 @@ def __init__(
         kerberos=False,
         kubernetes_uid=None,
         kubernetes_memory_limit=None,
+        kubernetes_queue=None,
         voms_proxy=False,
         rucio=False,
         kubernetes_job_timeout: Optional[int] = None,
@@ -113,6 +121,8 @@ def __init__(
         :type kubernetes_uid: int
         :param kubernetes_memory_limit: Memory limit for job container.
         :type kubernetes_memory_limit: str
+        :param kubernetes_queue: If Kueue is enabled of the MultiKueue LocalQueue to send jobs to.
+        :type kubernetes_queue: str
         :param kubernetes_job_timeout: Job timeout in seconds.
         :type kubernetes_job_timeout: int
         :param voms_proxy: Decides if a voms-proxy certificate should be
@@ -142,17 +152,11 @@ def __init__(
         self.rucio = rucio
         self.set_user_id(kubernetes_uid)
         self.set_memory_limit(kubernetes_memory_limit)
+        self.kubernetes_queue = kubernetes_queue
         self.workflow_uuid = workflow_uuid
         self.kubernetes_job_timeout = kubernetes_job_timeout
         self._secrets: Optional[UserSecrets] = secrets
 
-    @property
-    def secrets(self):
-        """Get cached secrets if present, otherwise fetch them from k8s."""
-        if self._secrets is None:
-            self._secrets = UserSecretsStore.fetch(REANA_USER_ID)
-        return self._secrets
-
     @JobManager.execution_hook
     def execute(self):
         """Execute a job in Kubernetes."""
@@ -164,14 +168,19 @@ def execute(self):
             "metadata": {
                 "name": backend_job_id,
                 "namespace": REANA_RUNTIME_KUBERNETES_NAMESPACE,
-                "labels": (
-                    {"kueue.x-k8s.io/queue-name": KUEUE_LOCAL_QUEUE_NAME}
-                    if USE_KUEUE
-                    else {}
-                ),
+                "labels": {
+                    "reana-run-job-workflow-uuid": self.workflow_uuid,
+                    **(
+                        {"kueue.x-k8s.io/queue-name": f'{self.kubernetes_queue}-job-queue'}
+                        if self.kubernetes_queue #todo and USE_KUEUE
+                        else {}
+                    ),
+                },
             },
             "spec": {
-                "backoffLimit": KubernetesJobManager.MAX_NUM_JOB_RESTARTS,
+                # todo revert
+                "backoffLimit": 10,
+                "ttlSecondsAfterFinished": 3600,
                 "autoSelector": True,
                 "template": {
                     "metadata": {
diff --git a/reana_job_controller/schemas.py b/reana_job_controller/schemas.py
@@ -50,6 +50,7 @@ class JobRequest(Schema):
     rucio = fields.Bool(required=False)
     kubernetes_uid = fields.Int(required=False)
     kubernetes_memory_limit = fields.Str(required=False)
+    kubernetes_queue = fields.Str(required=False)
     kubernetes_job_timeout = fields.Int(required=False)
     unpacked_img = fields.Bool(required=False)
     htcondor_max_runtime = fields.Str(required=False)
diff --git a/tests/test_job_monitor.py b/tests/test_job_monitor.py
@@ -107,7 +107,10 @@ def test_kubernetes_should_process_job(
             "Succeeded", "Completed", job_id=backend_job_id
         )
 
-        assert bool(job_monitor_k8s.should_process_job(job_pod_event)) == should_process
+        assert (
+            bool(job_monitor_k8s.should_process_job_pod(job_pod_event))
+            == should_process
+        )
 
 
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,10 @@ def test_kubernetes_should_process_job(`
`107`	`107`	`"Succeeded", "Completed", job_id=backend_job_id`
`108`	`108`	`)`
`109`	`109`
`110`		`- assert bool(job_monitor_k8s.should_process_job(job_pod_event)) == should_process`
	`110`	`+ assert (`
	`111`	`+ bool(job_monitor_k8s.should_process_job_pod(job_pod_event))`
	`112`	`+ == should_process`
	`113`	`+ )`
`111`	`114`
`112`	`115`
`113`	`116`	`@pytest.mark.parametrize(`