marin-community
diff --git a/‎lib/iris/scripts/benchmark_db_queries.py‎
Lines changed: 14 additions & 3 deletions b/‎lib/iris/scripts/benchmark_db_queries.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/actor_proxy.py‎
Lines changed: 3 additions & 5 deletions b/‎lib/iris/src/iris/cluster/controller/actor_proxy.py‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/auth.py‎
Lines changed: 6 additions & 6 deletions b/‎lib/iris/src/iris/cluster/controller/auth.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/autoscaler.py‎
Lines changed: 23 additions & 31 deletions b/‎lib/iris/src/iris/cluster/controller/autoscaler.py‎
Lines changed: 23 additions & 31 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/checkpoint.py‎
Lines changed: 4 additions & 4 deletions b/‎lib/iris/src/iris/cluster/controller/checkpoint.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎lib/iris/src/iris/cluster/controller/controller.py‎
Lines changed: 51 additions & 50 deletions b/‎lib/iris/src/iris/cluster/controller/controller.py‎
Lines changed: 51 additions & 50 deletions
@@ -35,9 +35,10 @@
 )
 from iris.cluster.controller.db import (
     ACTIVE_TASK_STATES,
-    JOBS,
     ControllerDB,
     EndpointQuery,
+    Job,
+    decode_rows,
     healthy_active_workers_with_attributes,
     running_tasks_by_worker,
     tasks_for_job_with_attempts,
@@ -121,8 +122,11 @@ def benchmark_scheduling(db: ControllerDB, iterations: int) -> list[tuple[str, f
     )
 
     def _reservation_jobs_old():
+        placeholders = ",".join("?" for _ in reservable_states)
         with db.snapshot() as snapshot:
-            all_jobs = snapshot.select(JOBS, where=JOBS.c.state.in_(list(reservable_states)))
+            all_jobs = decode_rows(
+                Job, snapshot.fetchall(f"SELECT * FROM jobs WHERE state IN ({placeholders})", reservable_states)
+            )
         return [j for j in all_jobs if j.request.HasField("reservation")]
 
     p50, p95 = bench("reservation_jobs (old: full scan)", _reservation_jobs_old, iterations=iterations)
@@ -145,8 +149,15 @@ def benchmark_dashboard(db: ControllerDB, iterations: int) -> list[tuple[str, fl
     results: list[tuple[str, float, float]] = []
 
     def _bench_jobs_in_states(db):
+        placeholders = ",".join("?" for _ in USER_JOB_STATES)
         with db.read_snapshot() as q:
-            return q.select(JOBS, where=JOBS.c.state.in_(list(USER_JOB_STATES)) & (JOBS.c.depth == 1))
+            return decode_rows(
+                Job,
+                q.fetchall(
+                    f"SELECT * FROM jobs WHERE state IN ({placeholders}) AND depth = 1",
+                    (*USER_JOB_STATES,),
+                ),
+            )
 
     p50, p95 = bench("jobs_in_states (top-level)", lambda: _bench_jobs_in_states(db), iterations=iterations)
     results.append(("jobs_in_states (top-level)", p50, p95))
 
@@ -20,7 +20,7 @@
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
 
-from iris.cluster.controller.db import ControllerDB, EndpointQuery, endpoint_query_predicate
+from iris.cluster.controller.db import ControllerDB, Endpoint, EndpointQuery, decode_rows, endpoint_query_sql
 
 logger = logging.getLogger(__name__)
 
@@ -99,11 +99,9 @@ async def handle(self, request: Request) -> Response:
     def _resolve_endpoint(self, name: str) -> str | None:
         """Resolve an endpoint name to an address via the controller DB."""
         query = EndpointQuery(exact_name=name)
-        joins, where = endpoint_query_predicate(query)
-        from iris.cluster.controller.service import ENDPOINTS
-
+        sql, params = endpoint_query_sql(query)
         with self._db.read_snapshot() as q:
-            endpoints = q.select(ENDPOINTS, where=where, joins=joins)
+            endpoints = decode_rows(Endpoint, q.fetchall(sql, tuple(params)))
         if not endpoints:
             return None
         return endpoints[0].address
@@ -17,7 +17,7 @@
 
 import jwt
 
-from iris.cluster.controller.db import API_KEYS, ApiKey, ControllerDB
+from iris.cluster.controller.db import ApiKey, ControllerDB, decode_one, decode_rows
 from iris.rpc import config_pb2
 from iris.rpc.auth import (
     GcpAccessTokenVerifier,
@@ -59,9 +59,10 @@ def create_api_key(
 
 def lookup_api_key_by_hash(db: ControllerDB, key_hash: str) -> ApiKey | None:
     """Find an API key by its SHA-256 hash."""
-    table = dataclasses.replace(API_KEYS, sql_name=db.api_keys_table)
     with db.snapshot() as q:
-        return q.one(table, where=table.c.key_hash == key_hash)
+        return decode_one(
+            ApiKey, q.fetchall(f"SELECT * FROM {db.api_keys_table} WHERE key_hash = ? LIMIT 1", (key_hash,))
+        )
 
 
 def touch_api_key(db: ControllerDB, key_id: str, now: Timestamp) -> None:
@@ -84,11 +85,10 @@ def revoke_api_key(db: ControllerDB, key_id: str, now: Timestamp) -> bool:
 
 def list_api_keys(db: ControllerDB, user_id: str | None = None) -> list[ApiKey]:
     """List API keys, optionally filtered by user."""
-    table = dataclasses.replace(API_KEYS, sql_name=db.api_keys_table)
     with db.snapshot() as q:
         if user_id:
-            return q.select(table, where=table.c.user_id == user_id)
-        return q.select(table)
+            return decode_rows(ApiKey, q.fetchall(f"SELECT * FROM {db.api_keys_table} WHERE user_id = ?", (user_id,)))
+        return decode_rows(ApiKey, q.fetchall(f"SELECT * FROM {db.api_keys_table}"))
 
 
 def revoke_login_keys_for_user(db: ControllerDB, user_id: str, now: Timestamp) -> list[str]:
 
@@ -47,7 +47,7 @@
     soft_constraint_score,
     split_hard_soft,
 )
-from iris.cluster.controller.db import SCALING_GROUPS, SLICES, TRACKED_WORKERS, ControllerDB
+from iris.cluster.controller.db import ControllerDB, _decode_json_list, _decode_timestamp_ms
 from iris.cluster.types import WorkerStatusMap
 from iris.cluster.controller.scaling_group import (
     GroupAvailability,
@@ -1260,38 +1260,30 @@ def restore_from_db(self, db: ControllerDB, platform: WorkerInfraProvider) -> No
         tracked workers. Call at startup before loops begin.
         """
         with db.snapshot() as snapshot:
-            scaling_rows = snapshot.select(
-                SCALING_GROUPS,
-                columns=(
-                    SCALING_GROUPS.c.name,
-                    SCALING_GROUPS.c.consecutive_failures,
-                    SCALING_GROUPS.c.backoff_until_ms,
-                    SCALING_GROUPS.c.last_scale_up_ms,
-                    SCALING_GROUPS.c.last_scale_down_ms,
-                    SCALING_GROUPS.c.quota_exceeded_until_ms,
-                    SCALING_GROUPS.c.quota_reason,
-                ),
+            scaling_rows = snapshot.raw(
+                "SELECT name, consecutive_failures, backoff_until_ms, last_scale_up_ms, "
+                "last_scale_down_ms, quota_exceeded_until_ms, quota_reason "
+                "FROM scaling_groups",
+                decoders={
+                    "consecutive_failures": int,
+                    "backoff_until_ms": _decode_timestamp_ms,
+                    "last_scale_up_ms": _decode_timestamp_ms,
+                    "last_scale_down_ms": _decode_timestamp_ms,
+                    "quota_exceeded_until_ms": _decode_timestamp_ms,
+                },
             )
-            slice_rows = snapshot.select(
-                SLICES,
-                columns=(
-                    SLICES.c.slice_id,
-                    SLICES.c.scale_group,
-                    SLICES.c.lifecycle,
-                    SLICES.c.worker_ids,
-                    SLICES.c.created_at_ms,
-                    SLICES.c.last_active_ms,
-                    SLICES.c.error_message,
-                ),
+            slice_rows = snapshot.raw(
+                "SELECT slice_id, scale_group, lifecycle, worker_ids, "
+                "created_at_ms, last_active_ms, error_message "
+                "FROM slices",
+                decoders={
+                    "worker_ids": _decode_json_list,
+                    "created_at_ms": _decode_timestamp_ms,
+                    "last_active_ms": _decode_timestamp_ms,
+                },
             )
-            tracked_rows = snapshot.select(
-                TRACKED_WORKERS,
-                columns=(
-                    TRACKED_WORKERS.c.worker_id,
-                    TRACKED_WORKERS.c.slice_id,
-                    TRACKED_WORKERS.c.scale_group,
-                    TRACKED_WORKERS.c.internal_address,
-                ),
+            tracked_rows = snapshot.raw(
+                "SELECT worker_id, slice_id, scale_group, internal_address FROM tracked_workers",
             )
 
         # Build GroupSnapshot objects from DB rows
 
@@ -33,7 +33,7 @@
 import fsspec.core
 import zstandard
 
-from iris.cluster.controller.db import JOBS, TASKS, WORKERS, ControllerDB
+from iris.cluster.controller.db import ControllerDB
 from iris.time_utils import Duration, Timestamp
 
 logger = logging.getLogger(__name__)
@@ -158,9 +158,9 @@ def write_checkpoint(
             tmp_zst2.unlink(missing_ok=True)
 
     with db.snapshot() as snapshot:
-        job_count = snapshot.count(JOBS)
-        task_count = snapshot.count(TASKS)
-        worker_count = snapshot.count(WORKERS)
+        job_count = snapshot.fetchone("SELECT COUNT(*) FROM jobs")[0]  # type: ignore[index]
+        task_count = snapshot.fetchone("SELECT COUNT(*) FROM tasks")[0]  # type: ignore[index]
+        worker_count = snapshot.fetchone("SELECT COUNT(*) FROM workers")[0]  # type: ignore[index]
     result = CheckpointResult(
         created_at=created_at,
         job_count=job_count,
 
@@ -32,18 +32,14 @@
     write_checkpoint,
 )
 from iris.cluster.controller.db import (
-    ATTEMPTS,
-    JOBS,
-    RESERVATION_CLAIMS,
-    TASKS,
-    WORKERS,
+    Attempt,
     ControllerDB,
-    Join,
     Job,
     Task,
     Worker,
     _decode_row,
     _tasks_with_attempts,
+    decode_rows,
     healthy_active_workers_with_attributes,
     insert_task_profile,
     running_tasks_by_worker,
@@ -260,13 +256,9 @@ def compute_demand_entries(
 def _read_reservation_claims(db: ControllerDB) -> dict[WorkerId, ReservationClaim]:
     """Read reservation claims from the canonical DB table."""
     with db.snapshot() as snapshot:
-        rows = snapshot.select(
-            RESERVATION_CLAIMS,
-            columns=(
-                RESERVATION_CLAIMS.c.worker_id,
-                RESERVATION_CLAIMS.c.job_id,
-                RESERVATION_CLAIMS.c.entry_idx,
-            ),
+        rows = snapshot.raw(
+            "SELECT rc.worker_id, rc.job_id, rc.entry_idx FROM reservation_claims rc",
+            decoders={"worker_id": WorkerId},
         )
     return {
         row.worker_id: ReservationClaim(
@@ -280,8 +272,12 @@ def _read_reservation_claims(db: ControllerDB) -> dict[WorkerId, ReservationClai
 def _jobs_by_id(queries: ControllerDB, job_ids: set[JobName]) -> dict[JobName, Job]:
     if not job_ids:
         return {}
+    wires = [job_id.to_wire() for job_id in job_ids]
+    placeholders = ",".join("?" for _ in wires)
     with queries.snapshot() as snapshot:
-        jobs = snapshot.select(JOBS, where=JOBS.c.job_id.in_([job_id.to_wire() for job_id in job_ids]))
+        jobs = decode_rows(
+            Job, snapshot.fetchall(f"SELECT * FROM jobs j WHERE j.job_id IN ({placeholders})", tuple(wires))
+        )
     return {job.job_id: job for job in jobs}
 
 
@@ -302,16 +298,14 @@ def _jobs_with_reservations(queries: ControllerDB, states: tuple[int, ...]) -> l
 
 def _schedulable_tasks(queries: ControllerDB) -> list[Task]:
     # Only PENDING tasks can pass can_be_scheduled(); no need to fetch ASSIGNED/BUILDING/RUNNING.
-    SCHEDULABLE_STATES = (cluster_pb2.TASK_STATE_PENDING,)
     with queries.snapshot() as snapshot:
-        tasks = snapshot.select(
-            TASKS,
-            where=TASKS.c.state.in_(list(SCHEDULABLE_STATES)),
-            order_by=(
-                TASKS.c.priority_neg_depth.asc(),
-                TASKS.c.priority_root_submitted_ms.asc(),
-                TASKS.c.submitted_at_ms.asc(),
-                TASKS.c.task_id.asc(),
+        tasks = decode_rows(
+            Task,
+            snapshot.fetchall(
+                "SELECT * FROM tasks t WHERE t.state = ? "
+                "ORDER BY t.priority_neg_depth ASC, t.priority_root_submitted_ms ASC, "
+                "t.submitted_at_ms ASC, t.task_id ASC",
+                (cluster_pb2.TASK_STATE_PENDING,),
             ),
         )
     return [task for task in tasks if task.can_be_scheduled()]
@@ -321,16 +315,22 @@ def _tasks_by_ids_with_attempts(queries: ControllerDB, task_ids: set[JobName]) -
     if not task_ids:
         return {}
     task_wires = [task_id.to_wire() for task_id in task_ids]
+    placeholders = ",".join("?" for _ in task_wires)
     with queries.snapshot() as snapshot:
-        tasks = snapshot.select(
-            TASKS,
-            where=TASKS.c.task_id.in_(task_wires),
-            order_by=(TASKS.c.task_id.asc(),),
+        tasks = decode_rows(
+            Task,
+            snapshot.fetchall(
+                f"SELECT * FROM tasks t WHERE t.task_id IN ({placeholders}) ORDER BY t.task_id ASC",
+                tuple(task_wires),
+            ),
         )
-        attempts = snapshot.select(
-            ATTEMPTS,
-            where=ATTEMPTS.c.task_id.in_(task_wires),
-            order_by=(ATTEMPTS.c.task_id.asc(), ATTEMPTS.c.attempt_id.asc()),
+        attempts = decode_rows(
+            Attempt,
+            snapshot.fetchall(
+                f"SELECT * FROM task_attempts a WHERE a.task_id IN ({placeholders}) "
+                "ORDER BY a.task_id ASC, a.attempt_id ASC",
+                tuple(task_wires),
+            ),
         )
     return {task.task_id: task for task in _tasks_with_attempts(tasks, attempts)}
 
@@ -362,25 +362,27 @@ def _building_counts(queries: ControllerDB, workers: list[Worker]) -> dict[Worke
 def _workers_by_id(queries: ControllerDB, worker_ids: set[WorkerId]) -> dict[WorkerId, Worker]:
     if not worker_ids:
         return {}
+    wires = [str(wid) for wid in worker_ids]
+    placeholders = ",".join("?" for _ in wires)
     with queries.snapshot() as snapshot:
-        workers = snapshot.select(
-            WORKERS,
-            where=WORKERS.c.worker_id.in_([str(worker_id) for worker_id in worker_ids]),
+        workers = decode_rows(
+            Worker, snapshot.fetchall(f"SELECT * FROM workers w WHERE w.worker_id IN ({placeholders})", tuple(wires))
         )
     return {worker.worker_id: worker for worker in workers}
 
 
 def _task_worker_mapping(queries: ControllerDB, task_ids: set[JobName]) -> dict[JobName, WorkerId]:
     if not task_ids:
         return {}
+    task_wires = [task_id.to_wire() for task_id in task_ids]
+    placeholders = ",".join("?" for _ in task_wires)
     with queries.snapshot() as snapshot:
-        rows = snapshot.select(
-            TASKS,
-            columns=(TASKS.c.task_id, ATTEMPTS.c.worker_id),
-            joins=(Join(table=ATTEMPTS, on=TASKS.c.task_id == ATTEMPTS.c.task_id),),
-            where=TASKS.c.task_id.in_([task_id.to_wire() for task_id in task_ids])
-            & (TASKS.c.current_attempt_id == ATTEMPTS.c.attempt_id)
-            & ATTEMPTS.c.worker_id.not_null(),
+        rows = snapshot.raw(
+            f"SELECT t.task_id, a.worker_id FROM tasks t "
+            f"JOIN task_attempts a ON t.task_id = a.task_id AND t.current_attempt_id = a.attempt_id "
+            f"WHERE t.task_id IN ({placeholders}) AND a.worker_id IS NOT NULL",
+            tuple(task_wires),
+            decoders={"task_id": JobName.from_wire, "worker_id": WorkerId},
         )
     return {row.task_id: row.worker_id for row in rows}
 
@@ -1178,12 +1180,8 @@ def _cleanup_stale_claims(self, claims: dict[WorkerId, ReservationClaim] | None
             persisted = True
         with self._db.snapshot() as snapshot:
             active_worker_ids = {
-                row.worker_id
-                for row in snapshot.select(
-                    WORKERS,
-                    columns=(WORKERS.c.worker_id,),
-                    where=WORKERS.c.active == 1,
-                )
+                WorkerId(str(row[0]))
+                for row in snapshot.fetchall("SELECT w.worker_id FROM workers w WHERE w.active = 1")
             }
         claimed_job_ids = {JobName.from_wire(claim.job_id) for claim in claims.values()}
         claimed_jobs = list(_jobs_by_id(self._db, claimed_job_ids).values()) if claimed_job_ids else []
@@ -1224,7 +1222,6 @@ def _claim_workers_for_reservations(self, claims: dict[WorkerId, ReservationClai
         )
         reservation_jobs = _jobs_with_reservations(self._db, reservable_states)
         for job in reservation_jobs:
-
             job_wire = job.job_id.to_wire()
             for idx, res_entry in enumerate(job.request.reservation.entries):
                 if (job_wire, idx) in claimed_entries:
@@ -1610,7 +1607,11 @@ def _sync_all_execution_units(self) -> None:
         if _HEALTH_SUMMARY_INTERVAL.should_run():
             workers = healthy_active_workers_with_attributes(self._db)
             with self._db.snapshot() as snap:
-                active = snap.count(JOBS, where=JOBS.c.state == cluster_pb2.JOB_STATE_RUNNING)
+                active = snap.fetchone(
+                    "SELECT COUNT(*) FROM jobs j WHERE j.state = ?", (cluster_pb2.JOB_STATE_RUNNING,)
+                )[
+                    0
+                ]  # type: ignore[index]
             pending = len(_schedulable_tasks(self._db))
             logger.info(
                 "Controller status: %d workers (%d failed), %d active jobs, %d pending tasks",
@@ -1647,7 +1648,7 @@ def _build_worker_status_map(self) -> WorkerStatusMap:
         """Build a map of worker_id to worker status for autoscaler idle tracking."""
         result: WorkerStatusMap = {}
         with self._db.snapshot() as snapshot:
-            workers = snapshot.select(WORKERS, where=WORKERS.c.active == 1)
+            workers = decode_rows(Worker, snapshot.fetchall("SELECT * FROM workers w WHERE w.active = 1"))
         running_by_worker = running_tasks_by_worker(self._db, {worker.worker_id for worker in workers})
         for worker in workers:
             result[worker.worker_id] = WorkerStatus(