diff --git a/app/desktop/desktop_server.py b/app/desktop/desktop_server.py
index b72c15704..9abe71c72 100644
--- a/app/desktop/desktop_server.py
+++ b/app/desktop/desktop_server.py
@@ -33,6 +33,8 @@
 from app.desktop.studio_server.eval_api import connect_evals_api
 from app.desktop.studio_server.finetune_api import connect_fine_tune_api
 from app.desktop.studio_server.import_api import connect_import_api
+from app.desktop.studio_server.jobs.api import connect_jobs_api
+from app.desktop.studio_server.jobs.registry import job_registry
 from app.desktop.studio_server.prompt_api import connect_prompt_api
 from app.desktop.studio_server.prompt_optimization_job_api import (
     connect_prompt_optimization_job_api,
@@ -111,6 +113,12 @@ async def lifespan(app: FastAPI):
         await _start_background_syncs()
         yield
     finally:
+        # End open SSE subscriptions so a UI holding the jobs stream open can't
+        # keep the worker alive (e.g. block a dev-server hot reload). Pure
+        # observer teardown — jobs keep running. Note uvicorn only reaches
+        # lifespan shutdown after its graceful-shutdown wait, so the dev server
+        # also sets timeout_graceful_shutdown to bound that wait.
+        job_registry.events.shutdown()
         try:
             await _stop_background_syncs()
         finally:
@@ -146,6 +154,7 @@ def make_app(tk_root: tk.Tk | None = None):
     connect_agent_api(app)
     connect_dev_tools(app)
     connect_chat_api(app)
+    connect_jobs_api(app)
     # Important: webhost must be last, it handles all other URLs
     connect_webhost(app)
     return app
diff --git a/app/desktop/dev_server.py b/app/desktop/dev_server.py
index 2b8804b9c..514ab013a 100644
--- a/app/desktop/dev_server.py
+++ b/app/desktop/dev_server.py
@@ -36,4 +36,10 @@
         reload=True,
         # Debounce when changing many files (changing branch)
         reload_delay=0.1,
+        # Bound the graceful-shutdown wait on reload. The UI holds the jobs SSE
+        # stream open; uvicorn waits for in-flight requests to finish BEFORE it
+        # runs lifespan shutdown (which closes the stream), so without a bound a
+        # reload would hang on the open SSE. After this many seconds uvicorn
+        # cancels the lingering request task instead.
+        timeout_graceful_shutdown=1,
     )
diff --git a/app/desktop/git_sync/middleware.py b/app/desktop/git_sync/middleware.py
index 36eba0e92..f4b234f06 100644
--- a/app/desktop/git_sync/middleware.py
+++ b/app/desktop/git_sync/middleware.py
@@ -357,7 +357,11 @@ def _resolve_endpoint(self, request: Request) -> Callable[..., Any] | None:
         return None
 
     def _get_manager_for_request(self, request: Request) -> GitSyncManager | None:
-        """Extract project_id from URL, resolve to path, return manager if auto-sync enabled."""
+        """Extract project_id from URL, resolve to path, return manager if auto-sync enabled.
+
+        Keep the project_id -> manager resolution below in sync with the request-free
+        copy in save_context.get_manager_for_project (used by background job workers).
+        """
         match = PROJECT_ID_PATTERN.match(request.url.path)
         if match is None:
             return None
diff --git a/app/desktop/git_sync/save_context.py b/app/desktop/git_sync/save_context.py
new file mode 100644
index 000000000..5ce24bedd
--- /dev/null
+++ b/app/desktop/git_sync/save_context.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from kiln_ai.utils.git_sync_protocols import SaveContext
+
+from app.desktop.git_sync.config import get_git_sync_config, project_path_from_id
+from app.desktop.git_sync.git_sync_manager import GitSyncManager
+from app.desktop.git_sync.registry import GitSyncRegistry
+
+
+def get_manager_for_project(project_id: str) -> GitSyncManager | None:
+    """Resolve a project_id to its GitSyncManager when auto-sync is active.
+
+    Request-free mirror of GitSyncMiddleware._get_manager_for_request (minus the
+    URL parsing). Returns None for every "not active" branch: the project has no
+    path, no git-sync config, sync_mode is not "auto", or no clone_path is set.
+
+    Config is keyed by project_path; the manager is keyed by clone_path. The
+    manager is always obtained via GitSyncRegistry.get_or_create so the single
+    per-clone-path manager (and its executor + non-reentrant write lock) is
+    shared with the HTTP path.
+    """
+    project_path = project_path_from_id(project_id)
+    if project_path is None:
+        return None
+
+    config = get_git_sync_config(project_path)
+    if config is None:
+        return None
+
+    if config["sync_mode"] != "auto":
+        return None
+
+    clone_path = config.get("clone_path")
+    if clone_path is None:
+        return None
+
+    return GitSyncRegistry.get_or_create(
+        repo_path=Path(clone_path),
+        remote_name=config["remote_name"],
+        pat_token=config.get("pat_token"),
+        oauth_token=config.get("oauth_token"),
+        auth_mode=config["auth_mode"],
+    )
+
+
+def save_context_for_project(project_id: str, context: str) -> SaveContext | None:
+    """Return a SaveContext wrapping writes in manager.atomic_write(context=...),
+    or None when git sync is not active for this project.
+
+    Mirrors build_save_context(request) for callers that have only a project_id
+    (e.g. background job workers). Runners coalesce None to a no-op context.
+    """
+    manager = get_manager_for_project(project_id)
+    if manager is None:
+        return None
+
+    bg_sync = GitSyncRegistry.get_background_sync(manager.repo_path)
+    if bg_sync is not None:
+        bg_sync.notify_request()
+
+    def factory():
+        return manager.atomic_write(context=context)
+
+    return factory
diff --git a/app/desktop/git_sync/test_save_context.py b/app/desktop/git_sync/test_save_context.py
new file mode 100644
index 000000000..a26d4590a
--- /dev/null
+++ b/app/desktop/git_sync/test_save_context.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+from contextlib import ExitStack, asynccontextmanager
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from app.desktop.git_sync.config import GitSyncProjectConfig
+from app.desktop.git_sync.save_context import (
+    get_manager_for_project,
+    save_context_for_project,
+)
+
+PROJECT_ID = "project_abc"
+PROJECT_PATH = "/tmp/test/project.kiln"
+CLONE_PATH = "/tmp/test/clone"
+
+
+def _auto_config(clone_path: str | None = CLONE_PATH) -> GitSyncProjectConfig:
+    return GitSyncProjectConfig(
+        sync_mode="auto",
+        auth_mode="system_keys",
+        remote_name="origin",
+        branch="main",
+        clone_path=clone_path,
+        git_url=None,
+        pat_token=None,
+        oauth_token=None,
+    )
+
+
+def _manual_config() -> GitSyncProjectConfig:
+    return GitSyncProjectConfig(
+        sync_mode="manual",
+        auth_mode="system_keys",
+        remote_name="origin",
+        branch="main",
+        clone_path=CLONE_PATH,
+        git_url=None,
+        pat_token=None,
+        oauth_token=None,
+    )
+
+
+class _FakeManager:
+    """Minimal AtomicWriteCapable stand-in that records atomic_write calls."""
+
+    def __init__(self, repo_path: Path = Path(CLONE_PATH)):
+        self.repo_path = repo_path
+        self.calls: list[str] = []
+        self.entered = False
+
+    @asynccontextmanager
+    async def atomic_write(self, context: str):
+        self.calls.append(context)
+        self.entered = True
+        yield
+
+
+def _patch_resolution(project_path, config, manager=None, bg_sync=None):
+    """Patch the config + registry calls used by the helper.
+
+    project_path_from_id and get_git_sync_config are looked up in the
+    save_context module namespace, so patch them there.
+    """
+    stack = ExitStack()
+    stack.enter_context(
+        patch(
+            "app.desktop.git_sync.save_context.project_path_from_id",
+            return_value=project_path,
+        )
+    )
+    stack.enter_context(
+        patch(
+            "app.desktop.git_sync.save_context.get_git_sync_config",
+            return_value=config,
+        )
+    )
+    stack.enter_context(
+        patch(
+            "app.desktop.git_sync.save_context.GitSyncRegistry.get_or_create",
+            return_value=manager,
+        )
+    )
+    stack.enter_context(
+        patch(
+            "app.desktop.git_sync.save_context.GitSyncRegistry.get_background_sync",
+            return_value=bg_sync,
+        )
+    )
+    return stack
+
+
+# -- None branches -----------------------------------------------------------
+
+
+def test_returns_none_when_no_project_path():
+    with _patch_resolution(project_path=None, config=None):
+        assert save_context_for_project(PROJECT_ID, context="ctx") is None
+        assert get_manager_for_project(PROJECT_ID) is None
+
+
+def test_returns_none_when_no_git_sync_config():
+    with _patch_resolution(project_path=PROJECT_PATH, config=None):
+        assert save_context_for_project(PROJECT_ID, context="ctx") is None
+        assert get_manager_for_project(PROJECT_ID) is None
+
+
+def test_returns_none_when_sync_mode_not_auto():
+    with _patch_resolution(project_path=PROJECT_PATH, config=_manual_config()):
+        assert save_context_for_project(PROJECT_ID, context="ctx") is None
+        assert get_manager_for_project(PROJECT_ID) is None
+
+
+def test_returns_none_when_clone_path_missing():
+    with _patch_resolution(
+        project_path=PROJECT_PATH, config=_auto_config(clone_path=None)
+    ):
+        assert save_context_for_project(PROJECT_ID, context="ctx") is None
+        assert get_manager_for_project(PROJECT_ID) is None
+
+
+# -- active branches ---------------------------------------------------------
+
+
+def test_get_manager_uses_registry_with_config_values():
+    manager = _FakeManager()
+    with (
+        patch(
+            "app.desktop.git_sync.save_context.project_path_from_id",
+            return_value=PROJECT_PATH,
+        ),
+        patch(
+            "app.desktop.git_sync.save_context.get_git_sync_config",
+            return_value=_auto_config(),
+        ),
+        patch(
+            "app.desktop.git_sync.save_context.GitSyncRegistry.get_or_create",
+            return_value=manager,
+        ) as mock_get_or_create,
+    ):
+        result = get_manager_for_project(PROJECT_ID)
+
+    assert result is manager
+    mock_get_or_create.assert_called_once_with(
+        repo_path=Path(CLONE_PATH),
+        remote_name="origin",
+        pat_token=None,
+        oauth_token=None,
+        auth_mode="system_keys",
+    )
+
+
+async def test_save_context_enters_atomic_write_with_label():
+    manager = _FakeManager()
+    with _patch_resolution(
+        project_path=PROJECT_PATH, config=_auto_config(), manager=manager
+    ):
+        save_context = save_context_for_project(PROJECT_ID, context="eval job e1/r1")
+
+    assert save_context is not None
+    assert manager.entered is False  # built lazily, not yet entered
+
+    async with save_context():
+        pass
+
+    assert manager.calls == ["eval job e1/r1"]
+
+
+def test_save_context_notifies_background_sync():
+    manager = _FakeManager()
+    bg_sync = MagicMock()
+    with _patch_resolution(
+        project_path=PROJECT_PATH,
+        config=_auto_config(),
+        manager=manager,
+        bg_sync=bg_sync,
+    ):
+        save_context = save_context_for_project(PROJECT_ID, context="ctx")
+
+    assert save_context is not None
+    bg_sync.notify_request.assert_called_once()
+
+
+def test_save_context_no_background_sync_is_fine():
+    manager = _FakeManager()
+    with _patch_resolution(
+        project_path=PROJECT_PATH,
+        config=_auto_config(),
+        manager=manager,
+        bg_sync=None,
+    ):
+        save_context = save_context_for_project(PROJECT_ID, context="ctx")
+
+    assert save_context is not None
+
+
+# -- error propagation -------------------------------------------------------
+
+
+def test_propagates_when_config_lookup_raises():
+    # A corrupt/raising config lookup must surface (failing the job) rather than
+    # be swallowed to None, which would silently skip commits for an auto-sync
+    # project — the very bug this resolver exists to prevent.
+    with (
+        patch(
+            "app.desktop.git_sync.save_context.project_path_from_id",
+            return_value=PROJECT_PATH,
+        ),
+        patch(
+            "app.desktop.git_sync.save_context.get_git_sync_config",
+            side_effect=RuntimeError("corrupt config"),
+        ),
+    ):
+        with pytest.raises(RuntimeError, match="corrupt config"):
+            get_manager_for_project(PROJECT_ID)
+        with pytest.raises(RuntimeError, match="corrupt config"):
+            save_context_for_project(PROJECT_ID, context="ctx")
diff --git a/app/desktop/studio_server/jobs/__init__.py b/app/desktop/studio_server/jobs/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/app/desktop/studio_server/jobs/api.py b/app/desktop/studio_server/jobs/api.py
new file mode 100644
index 000000000..554470940
--- /dev/null
+++ b/app/desktop/studio_server/jobs/api.py
@@ -0,0 +1,355 @@
+from __future__ import annotations
+
+import asyncio
+import json
+from datetime import datetime
+from typing import Annotated, Any, AsyncGenerator
+
+from fastapi import FastAPI, HTTPException, Path, Query, Response
+from kiln_server.cancellable_streaming_response import CancellableStreamingResponse
+from kiln_server.utils.agent_checks.policy import (
+    ALLOW_AGENT,
+    agent_policy_require_approval,
+)
+from pydantic import BaseModel, Field, ValidationError
+
+from . import error_log
+from .events import JobEvent
+from .models import BackgroundJobStatus, JobRecord
+from .registry import JobNotFoundError, JobOperationError, job_registry
+from .workers.noop import NoopJobWorker
+
+KEEPALIVE_SECONDS = 15.0
+
+_JOB_MUTATION_APPROVAL = agent_policy_require_approval(
+    "Allow agent to control background jobs (pause, resume, cancel, delete)?"
+)
+
+
+class CreateJobRequest(BaseModel):
+    """Request body for creating a job. Params are validated per job type."""
+
+    params: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Type-specific job parameters, validated against the type's params model.",
+    )
+    project_id: str | None = Field(
+        default=None,
+        description="Project to scope this job to (for filtering/visibility). "
+        "Falls back to the params' project_id when omitted.",
+    )
+    metadata: dict[str, Any] | None = Field(
+        default=None,
+        description="Free-form pass-through attribution, stored verbatim.",
+    )
+
+
+class CreateJobResponse(BaseModel):
+    """Response returned when a job is created."""
+
+    job_id: str = Field(description="The id of the newly created job.")
+    status: BackgroundJobStatus = Field(
+        description="The job's status immediately after creation."
+    )
+
+
+def _project_id_from_params(validated_params: BaseModel) -> str | None:
+    return getattr(validated_params, "project_id", None)
+
+
+def _format_sse(event: JobEvent) -> str:
+    return (
+        f"event: {event.event}\ndata: {json.dumps(event.data, ensure_ascii=False)}\n\n"
+    )
+
+
+async def _event_stream(
+    job_id: str | None,
+    type_name: str | None,
+    project_id: str | None,
+):
+    """Pure-observer SSE generator.
+
+    Subscribes to the registry event bus and forwards snapshot/job/deleted
+    events, injecting a keepalive comment between events. Closing this generator
+    (client disconnect, via CancellableStreamingResponse) only unsubscribes from
+    the bus — it never touches any job's supervising task. Jobs keep running.
+    """
+    # subscribe() handles the keepalive itself, yielding a "ping" event after
+    # `timeout` idle seconds.
+    subscription: AsyncGenerator[JobEvent, None] = job_registry.events.subscribe(
+        job_id=job_id,
+        type_name=type_name,
+        project_id=project_id,
+        timeout=KEEPALIVE_SECONDS,
+    )
+    try:
+        async for event in subscription:
+            if event.event == "ping":
+                yield ": ping\n\n"
+            else:
+                yield _format_sse(event)
+    finally:
+        await subscription.aclose()
+
+
+def connect_jobs_api(app: FastAPI) -> None:
+    # Register the workers this server exposes. register_type overwrites by
+    # type_name, so repeated calls (e.g. multiple make_app() in tests) are safe.
+    job_registry.register_type(NoopJobWorker)
+
+    @app.get(
+        "/api/jobs/events",
+        summary="Stream Job Events",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def stream_job_events(
+        job_id: Annotated[
+            str | None, Query(description="Only stream events for this job id.")
+        ] = None,
+        type: Annotated[
+            str | None, Query(description="Only stream events for this job type.")
+        ] = None,
+        project_id: Annotated[
+            str | None, Query(description="Only stream events for this project id.")
+        ] = None,
+    ) -> CancellableStreamingResponse:
+        """Server-sent events for jobs. Emits an initial `snapshot`, then per-job
+        `job` and `deleted` events. A pure observer: disconnecting never stops a job."""
+        return CancellableStreamingResponse(
+            content=_event_stream(job_id, type, project_id),
+            media_type="text/event-stream",
+        )
+
+    @app.get(
+        "/api/jobs",
+        summary="List Jobs",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def list_jobs(
+        status: Annotated[
+            BackgroundJobStatus | None, Query(description="Filter by job status.")
+        ] = None,
+        type: Annotated[str | None, Query(description="Filter by job type.")] = None,
+        project_id: Annotated[
+            str | None, Query(description="Filter by project id.")
+        ] = None,
+        since: Annotated[
+            datetime | None,
+            Query(description="Only jobs created at or after this ISO-8601 time."),
+        ] = None,
+        limit: Annotated[
+            int | None, Query(description="Maximum number of jobs to return.")
+        ] = None,
+    ) -> list[JobRecord]:
+        return job_registry.list_jobs(
+            status=status,
+            type_name=type,
+            project_id=project_id,
+            since=since,
+            limit=limit,
+        )
+
+    @app.post(
+        "/api/jobs/{type}",
+        summary="Create Job",
+        tags=["Jobs"],
+        status_code=201,
+        response_model=CreateJobResponse | JobRecord,
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def create_job(
+        type: Annotated[str, Path(description="The registered job type to run.")],
+        request: CreateJobRequest,
+        wait: Annotated[
+            bool,
+            Query(
+                description="When true, block until the job reaches a terminal "
+                "state and return the full JobRecord instead of CreateJobResponse."
+            ),
+        ] = False,
+        timeout: Annotated[
+            float | None,
+            Query(
+                ge=0,
+                description="Seconds to wait when wait=true (504 on timeout). "
+                "Omit to wait indefinitely.",
+            ),
+        ] = None,
+    ) -> CreateJobResponse | JobRecord:
+        try:
+            worker = job_registry.worker_for(type)
+        except JobOperationError:
+            raise HTTPException(status_code=404, detail=f"Unknown job type: {type}")
+
+        try:
+            validated = worker.params_model.model_validate(request.params)
+        except ValidationError as exc:
+            raise HTTPException(status_code=422, detail=exc.errors())
+
+        job = await job_registry.create(
+            type_name=type,
+            params=validated,
+            project_id=request.project_id or _project_id_from_params(validated),
+            metadata=request.metadata,
+        )
+        if not wait:
+            return CreateJobResponse(job_id=job.id, status=job.status)
+        try:
+            return await job_registry.wait(job.id, timeout=timeout)
+        except asyncio.TimeoutError:
+            raise HTTPException(
+                status_code=504, detail="Job did not complete within the timeout."
+            )
+
+    @app.get(
+        "/api/jobs/{id}",
+        summary="Get Job",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def get_job(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> JobRecord:
+        job = await job_registry.get(id)
+        if job is None:
+            raise HTTPException(status_code=404, detail=f"Job not found: {id}")
+        return job
+
+    @app.get(
+        "/api/jobs/{id}/result",
+        summary="Get Job Result",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def get_job_result(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> dict[str, Any]:
+        job = await job_registry.get(id)
+        if job is None:
+            raise HTTPException(status_code=404, detail=f"Job not found: {id}")
+        if not job.status.is_terminal or job.result is None:
+            raise HTTPException(
+                status_code=404, detail="No result available for this job."
+            )
+        return job.result
+
+    @app.get(
+        "/api/jobs/{id}/wait",
+        summary="Wait For Job",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def wait_for_job(
+        id: Annotated[str, Path(description="The job id.")],
+        timeout: Annotated[
+            float | None,
+            Query(
+                ge=0,
+                description="Seconds to wait before giving up (504 on timeout). "
+                "Omit to wait indefinitely.",
+            ),
+        ] = None,
+    ) -> JobRecord:
+        """Block until the job reaches a terminal state, then return its record.
+
+        A pure observer, like the SSE stream: if the client disconnects, uvicorn
+        cancels this handler coroutine, which cancels the wait() await and tears
+        down only the awaiter — the job's supervising task keeps running."""
+        try:
+            return await job_registry.wait(id, timeout=timeout)
+        except JobNotFoundError:
+            raise HTTPException(status_code=404, detail=f"Job not found: {id}")
+        except asyncio.TimeoutError:
+            raise HTTPException(
+                status_code=504, detail="Job did not complete within the timeout."
+            )
+
+    @app.get(
+        "/api/jobs/{id}/errors",
+        summary="Get Job Errors",
+        tags=["Jobs"],
+        openapi_extra=ALLOW_AGENT,
+    )
+    async def get_job_errors(
+        id: Annotated[str, Path(description="The job id.")],
+        run_id: Annotated[
+            str | None,
+            Query(description="Read the error log for a specific past run id."),
+        ] = None,
+    ) -> list[dict[str, Any]]:
+        # Always 200, never errors (functional_spec §5). A plain non-reconciling
+        # lookup of the current run_id — we don't recompute state for a
+        # best-effort diagnostic read.
+        resolved_run_id = run_id or job_registry.run_id_for(id)
+        if resolved_run_id is None:
+            return []
+        return error_log.read_errors(resolved_run_id)
+
+    @app.post(
+        "/api/jobs/{id}/pause",
+        summary="Pause Job",
+        tags=["Jobs"],
+        status_code=202,
+        openapi_extra=_JOB_MUTATION_APPROVAL,
+    )
+    async def pause_job(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> Response:
+        await _run_lifecycle(job_registry.pause, id)
+        return Response(status_code=202)
+
+    @app.post(
+        "/api/jobs/{id}/resume",
+        summary="Resume Job",
+        tags=["Jobs"],
+        status_code=202,
+        openapi_extra=_JOB_MUTATION_APPROVAL,
+    )
+    async def resume_job(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> Response:
+        await _run_lifecycle(job_registry.resume, id)
+        return Response(status_code=202)
+
+    @app.post(
+        "/api/jobs/{id}/cancel",
+        summary="Cancel Job",
+        tags=["Jobs"],
+        status_code=202,
+        openapi_extra=_JOB_MUTATION_APPROVAL,
+    )
+    async def cancel_job(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> Response:
+        await _run_lifecycle(job_registry.cancel, id)
+        return Response(status_code=202)
+
+    @app.delete(
+        "/api/jobs/{id}",
+        summary="Delete Job",
+        tags=["Jobs"],
+        status_code=204,
+        openapi_extra=_JOB_MUTATION_APPROVAL,
+    )
+    async def delete_job(
+        id: Annotated[str, Path(description="The job id.")],
+    ) -> Response:
+        await _run_lifecycle(job_registry.delete, id)
+        return Response(status_code=204)
+
+
+async def _run_lifecycle(operation, job_id: str) -> Any:
+    """Invoke a registry lifecycle op, mapping its exceptions to HTTP status.
+
+    JobNotFoundError -> 404, JobOperationError (invalid transition / unsupported
+    pause / delete in-flight) -> 409.
+    """
+    try:
+        return await operation(job_id)
+    except JobNotFoundError:
+        raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
+    except JobOperationError as exc:
+        raise HTTPException(status_code=409, detail=str(exc))
diff --git a/app/desktop/studio_server/jobs/error_log.py b/app/desktop/studio_server/jobs/error_log.py
new file mode 100644
index 000000000..6e8e23715
--- /dev/null
+++ b/app/desktop/studio_server/jobs/error_log.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import json
+import tempfile
+from pathlib import Path
+from typing import Any
+
+ERROR_LOG_DIR_NAME = "kiln_jobs"
+
+
+def error_log_dir() -> Path:
+    return Path(tempfile.gettempdir()) / ERROR_LOG_DIR_NAME
+
+
+def error_log_path(run_id: str) -> Path:
+    return error_log_dir() / f"{run_id}.json"
+
+
+def append_error(run_id: str, entry: dict[str, Any]) -> None:
+    """Append a single error entry to this run's log (JSON Lines). Best-effort.
+
+    Creates the directory lazily. Any IO/serialization failure is swallowed —
+    the error log is a diagnostic convenience, never a guarantee.
+    """
+    try:
+        directory = error_log_dir()
+        directory.mkdir(parents=True, exist_ok=True)
+        line = json.dumps(entry, ensure_ascii=False)
+        with error_log_path(run_id).open("a", encoding="utf-8") as f:
+            f.write(line + "\n")
+    except Exception:
+        pass
+
+
+def read_errors(run_id: str) -> list[dict[str, Any]]:
+    """Read the error log for a run as a list of objects. Best-effort.
+
+    A missing or unreadable file returns []. Individual unparsable lines are
+    skipped rather than failing the whole read. Never raises.
+    """
+    entries: list[dict[str, Any]] = []
+    try:
+        path = error_log_path(run_id)
+        if not path.exists():
+            return []
+        with path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    parsed = json.loads(line)
+                except (ValueError, TypeError):
+                    continue
+                if isinstance(parsed, dict):
+                    entries.append(parsed)
+    except Exception:
+        return entries
+    return entries
+
+
+def delete_errors(run_id: str) -> None:
+    """Best-effort remove the error log file for a run. Swallows all errors."""
+    try:
+        error_log_path(run_id).unlink(missing_ok=True)
+    except Exception:
+        pass
diff --git a/app/desktop/studio_server/jobs/events.py b/app/desktop/studio_server/jobs/events.py
new file mode 100644
index 000000000..dacff3d48
--- /dev/null
+++ b/app/desktop/studio_server/jobs/events.py
@@ -0,0 +1,156 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncGenerator, Callable, Literal
+
+from pydantic import BaseModel
+
+from .models import JobRecord
+
+
+class JobEvent(BaseModel):
+    """A single bus event. Per-job events carry the full record (idempotent snapshot)."""
+
+    event: Literal["snapshot", "job", "deleted", "ping"]
+    data: dict[str, Any]
+
+
+class _CloseSentinel:
+    """Pushed onto a subscriber's queue by ``shutdown()`` to end its stream
+    promptly (e.g. so a dev-server hot reload isn't blocked by open SSE
+    connections), distinct from a normal ``JobEvent``."""
+
+
+_CLOSE = _CloseSentinel()
+
+
+class _Subscriber:
+    def __init__(
+        self,
+        job_id: str | None,
+        type_name: str | None,
+        project_id: str | None,
+    ) -> None:
+        self.queue: asyncio.Queue[JobEvent | _CloseSentinel] = asyncio.Queue()
+        self.job_id = job_id
+        self.type_name = type_name
+        self.project_id = project_id
+
+    def matches(
+        self,
+        record_id: str | None,
+        record_type: str | None,
+        record_project_id: str | None,
+    ) -> bool:
+        if self.job_id is not None and self.job_id != record_id:
+            return False
+        if self.type_name is not None and self.type_name != record_type:
+            return False
+        if self.project_id is not None and self.project_id != record_project_id:
+            return False
+        return True
+
+
+SnapshotProvider = Callable[[], list[JobRecord]]
+
+
+class JobEventBus:
+    """In-process async pub/sub bus feeding the SSE endpoint (Phase 2).
+
+    Subscribers receive an initial `snapshot` event, then per-job `job` events
+    and `deleted` tombstones, filtered by job_id / type / project_id.
+    """
+
+    def __init__(self, snapshot_provider: SnapshotProvider | None = None) -> None:
+        self._subscribers: set[_Subscriber] = set()
+        self._snapshot_provider = snapshot_provider
+        self._closed = False
+
+    def set_snapshot_provider(self, provider: SnapshotProvider) -> None:
+        self._snapshot_provider = provider
+
+    def _filtered_snapshot(self, subscriber: _Subscriber) -> list[JobRecord]:
+        if self._snapshot_provider is None:
+            return []
+        return [
+            record
+            for record in self._snapshot_provider()
+            if subscriber.matches(record.id, record.type, record.project_id)
+        ]
+
+    async def subscribe(
+        self,
+        job_id: str | None = None,
+        type_name: str | None = None,
+        project_id: str | None = None,
+        timeout: float | None = None,
+    ) -> AsyncGenerator[JobEvent, None]:
+        """Yield the initial snapshot then per-job events.
+
+        When ``timeout`` is set, a ``ping`` event is yielded after that many
+        seconds without a real event. The timeout MUST live here, inside the
+        generator: cancelling ``subscribe().__anext__()` from the outside (e.g.
+        ``asyncio.wait_for``) throws CancelledError into the suspended generator,
+        runs its ``finally``, and finalizes it — so the very next ``__anext__``
+        would raise StopAsyncIteration and kill the stream after one ping.
+
+        The generator ends (returns) when ``shutdown()`` has been called: either
+        immediately if the bus is already closed, or as soon as the close
+        sentinel reaches the head of the queue.
+        """
+        if self._closed:
+            return
+        subscriber = _Subscriber(job_id, type_name, project_id)
+        self._subscribers.add(subscriber)
+        try:
+            snapshot = self._filtered_snapshot(subscriber)
+            yield JobEvent(
+                event="snapshot",
+                data={"jobs": [r.model_dump(mode="json") for r in snapshot]},
+            )
+            while True:
+                if timeout is None:
+                    item = await subscriber.queue.get()
+                else:
+                    try:
+                        item = await asyncio.wait_for(
+                            subscriber.queue.get(), timeout=timeout
+                        )
+                    except asyncio.TimeoutError:
+                        yield JobEvent(event="ping", data={})
+                        continue
+                if isinstance(item, _CloseSentinel):
+                    return
+                yield item
+        finally:
+            self._subscribers.discard(subscriber)
+
+    def shutdown(self) -> None:
+        """End every open subscription and reject new ones.
+
+        Pushes a close sentinel onto each subscriber's queue so its
+        ``subscribe()`` generator returns promptly. Used on server shutdown so a
+        long-lived SSE connection (the jobs stream the UI holds open) doesn't
+        keep the worker alive — e.g. blocking a dev-server hot reload. A pure
+        observer teardown: it never touches any job's supervising task.
+        """
+        self._closed = True
+        for subscriber in self._subscribers:
+            subscriber.queue.put_nowait(_CLOSE)
+
+    def publish_job(self, record: JobRecord) -> None:
+        event = JobEvent(event="job", data=record.model_dump(mode="json"))
+        for subscriber in self._subscribers:
+            if subscriber.matches(record.id, record.type, record.project_id):
+                subscriber.queue.put_nowait(event)
+
+    def publish_deleted(
+        self,
+        job_id: str,
+        type_name: str | None = None,
+        project_id: str | None = None,
+    ) -> None:
+        event = JobEvent(event="deleted", data={"id": job_id})
+        for subscriber in self._subscribers:
+            if subscriber.matches(job_id, type_name, project_id):
+                subscriber.queue.put_nowait(event)
diff --git a/app/desktop/studio_server/jobs/models.py b/app/desktop/studio_server/jobs/models.py
new file mode 100644
index 000000000..3d5404f64
--- /dev/null
+++ b/app/desktop/studio_server/jobs/models.py
@@ -0,0 +1,204 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from enum import Enum
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    ClassVar,
+    Generic,
+    TypeVar,
+)
+
+from pydantic import BaseModel, Field
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+class BackgroundJobStatus(str, Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    PAUSED = "paused"
+    SUCCEEDED = "succeeded"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+    @property
+    def is_terminal(self) -> bool:
+        return self in TERMINAL_STATUSES
+
+
+TERMINAL_STATUSES = frozenset(
+    {
+        BackgroundJobStatus.SUCCEEDED,
+        BackgroundJobStatus.FAILED,
+        BackgroundJobStatus.CANCELLED,
+    }
+)
+
+
+class JobProgress(BaseModel):
+    """Count-based progress for a job.
+
+    Processed = success + error; remaining = total - success - error. The error
+    field is a count only — the actual messages live in the per-run error log.
+    """
+
+    total: int | None = None
+    success: int = 0
+    error: int = 0
+    message: str | None = None
+    updated_at: datetime = Field(default_factory=_utc_now)
+
+
+class JobDerivedState(BaseModel):
+    """A worker's view of the operation's true state, read from source-of-truth entities."""
+
+    total: int | None = None
+    success: int = 0
+    error: int = 0
+    is_complete: bool = False
+    message: str | None = None
+
+
+class JobError(BaseModel):
+    """Small failure summary stamped on the record. Detail lives in the error log."""
+
+    error: str | None = None
+    detail: dict[str, Any] | None = None
+
+
+class JobRecord(BaseModel):
+    """Ephemeral, in-memory bookkeeping for a single job. Never persisted to disk."""
+
+    id: str
+    type: str
+    status: BackgroundJobStatus
+    run_id: str | None = None
+    progress: JobProgress = Field(default_factory=JobProgress)
+    # Typed, per-worker progress detail (validated against the worker's
+    # `progress_model`). The generic `progress` above is the universal counter;
+    # this carries the rich per-kind shape a worker needs the UI to render
+    # (e.g. RAG's four-phase breakdown). Kept as a dict on the wire so the core
+    # stays worker-agnostic; the frontend casts it to the worker's model.
+    progress_detail: dict[str, Any] | None = None
+    params: dict[str, Any] = Field(default_factory=dict)
+    result: dict[str, Any] | None = None
+    error: JobError | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    project_id: str | None = None
+    supports_pause: bool = False
+    created_at: datetime = Field(default_factory=_utc_now)
+    updated_at: datetime = Field(default_factory=_utc_now)
+    started_at: datetime | None = None
+    ended_at: datetime | None = None
+
+
+ReportProgress = Callable[["JobProgressUpdate"], Awaitable[None]]
+ReportProgressDetail = Callable[[BaseModel], Awaitable[None]]
+ReportError = Callable[[str, dict[str, Any]], Awaitable[None]]
+
+
+class JobProgressUpdate(BaseModel):
+    success: int
+    error: int = 0
+    total: int | None = None
+    message: str | None = None
+
+
+class JobContext:
+    """Provided to the worker by JobRegistry during run().
+
+    Holds the current job_id and run_id, plus registry-injected callbacks for
+    reporting progress (in-memory snapshot + event) and per-item errors (error log).
+    """
+
+    def __init__(
+        self,
+        job_id: str,
+        run_id: str,
+        report_progress: ReportProgress,
+        report_progress_detail: ReportProgressDetail,
+        report_error: ReportError,
+    ) -> None:
+        self.job_id = job_id
+        self.run_id = run_id
+        self._report_progress = report_progress
+        self._report_progress_detail = report_progress_detail
+        self._report_error = report_error
+
+    async def report_progress(
+        self,
+        success: int,
+        error: int = 0,
+        total: int | None = None,
+        message: str | None = None,
+    ) -> None:
+        """Update the registry's in-memory progress snapshot and emit an event.
+
+        A UI-smoothing signal only — the authoritative progress comes from
+        compute_state(). Cheap to call often.
+        """
+        await self._report_progress(
+            JobProgressUpdate(
+                success=success,
+                error=error,
+                total=total,
+                message=message,
+            )
+        )
+
+    async def report_progress_detail(self, detail: BaseModel) -> None:
+        """Stamp the job's typed `progress_detail` with a worker-specific model.
+
+        For rich per-kind progress the generic counter can't carry (e.g. RAG's
+        per-phase breakdown). `detail` must be an instance of the worker's
+        declared `progress_model`; the registry validates and serializes it.
+        A UI-smoothing signal only — authoritative progress comes from
+        compute_state(). Cheap to call often.
+        """
+        await self._report_progress_detail(detail)
+
+    async def report_error(self, error_message: str, **extra: Any) -> None:
+        """Append one structured error entry to this run's error log.
+
+        For non-fatal per-item errors that don't stop the run. Best-effort: a
+        failed write is swallowed, never propagated. Does not itself bump the
+        progress error count — report that via report_progress.
+        """
+        await self._report_error(error_message, extra)
+
+
+TParams = TypeVar("TParams", bound=BaseModel)
+TResult = TypeVar("TResult", bound=BaseModel)
+
+
+class JobWorker(Generic[TParams, TResult]):
+    type_name: ClassVar[str]
+    params_model: ClassVar[type[BaseModel]]
+    result_model: ClassVar[type[BaseModel]]
+    # Optional typed model for rich per-worker progress reported via
+    # JobContext.report_progress_detail(); stamped on JobRecord.progress_detail.
+    # Leave None for workers whose generic count progress is enough.
+    progress_model: ClassVar[type[BaseModel] | None] = None
+    supports_pause: ClassVar[bool] = False
+
+    async def compute_state(self, params: TParams) -> JobDerivedState | None:
+        """Read source-of-truth Kiln entities and return the operation's true state.
+
+        MUST be a pure read — no side effects, idempotent, safe to call any time.
+        Return None only when the worker has no backing entity to consult (e.g.
+        the NoopJob fixture); the registry then keeps the last believed snapshot.
+        Real workers must override this.
+        """
+        return None
+
+    async def run(self, params: TParams, ctx: JobContext) -> TResult:
+        """MUST be idempotent. Covers both first run and resume — the registry
+        calls run() again to resume a paused job; the worker re-orients via
+        compute_state(), not a handed-in checkpoint.
+        """
+        raise NotImplementedError
diff --git a/app/desktop/studio_server/jobs/registry.py b/app/desktop/studio_server/jobs/registry.py
new file mode 100644
index 000000000..00ddff357
--- /dev/null
+++ b/app/desktop/studio_server/jobs/registry.py
@@ -0,0 +1,539 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import secrets
+import traceback
+import uuid
+from datetime import datetime
+from typing import Any
+
+from pydantic import BaseModel
+
+from . import error_log
+from .events import JobEventBus
+from .models import (
+    BackgroundJobStatus,
+    JobContext,
+    JobDerivedState,
+    JobError,
+    JobProgress,
+    JobProgressUpdate,
+    JobRecord,
+    JobWorker,
+    _utc_now,
+)
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MAX_CONCURRENT = 10
+MAX_CONCURRENT_ENV_VAR = "KILN_JOBS_MAX_CONCURRENT"
+
+_JOB_ID_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567"
+_JOB_ID_LENGTH = 12
+
+
+class JobNotFoundError(Exception):
+    pass
+
+
+class JobOperationError(Exception):
+    """Raised for invalid lifecycle operations (e.g. pause a non-running job).
+
+    Phase 2 maps these to 409 Conflict.
+    """
+
+
+def _new_job_id() -> str:
+    suffix = "".join(secrets.choice(_JOB_ID_ALPHABET) for _ in range(_JOB_ID_LENGTH))
+    return f"j_{suffix}"
+
+
+def _resolve_max_concurrent(explicit: int | None) -> int:
+    if explicit is not None:
+        return explicit
+    raw = os.environ.get(MAX_CONCURRENT_ENV_VAR)
+    if raw:
+        try:
+            value = int(raw)
+            if value > 0:
+                return value
+        except ValueError:
+            pass
+    return DEFAULT_MAX_CONCURRENT
+
+
+class JobRegistry:
+    """In-memory registry owning job lifecycle, concurrency, and reconciliation.
+
+    Singleton per process. The in-memory index is the only store — no disk
+    persistence of state. Supervising tasks are owned here and decoupled from any
+    HTTP connection.
+    """
+
+    def __init__(self, max_concurrent: int | None = None) -> None:
+        self._max_concurrent = _resolve_max_concurrent(max_concurrent)
+        self._workers: dict[str, JobWorker] = {}
+        self._jobs: dict[str, JobRecord] = {}
+        self._tasks: dict[str, asyncio.Task] = {}
+        self._pending_ids: list[str] = []
+        self._cancel_intent: set[str] = set()
+        self._pause_intent: set[str] = set()
+        # Job ids whose supervising task received a real (delivered-to-a-live-
+        # task) cancellation. Distinguishes "worker swallowed a cancel" from
+        # "worker finished before any cancel landed" when the worker returns
+        # normally — the former must transition to paused/cancelled, the latter
+        # must keep its succeeded result.
+        self._cancel_delivered: set[str] = set()
+        # Per-job completion events for awaiters (registry.wait). Created lazily
+        # by wait(); set by _emit() on the terminal transition; reclaimed in
+        # delete(). Bounded to one event per waited job, tracking the same
+        # lifecycle as the JobRecord. Shared across all awaiters of a job so one
+        # awaiter cancelling its wait() leaves the event (and the task) untouched.
+        self._completion_events: dict[str, asyncio.Event] = {}
+        self._running_count = 0
+        self.events = JobEventBus(snapshot_provider=self._snapshot)
+
+    # -- registration --------------------------------------------------------
+
+    def register_type(self, worker_cls: type[JobWorker]) -> None:
+        worker = worker_cls()
+        self._workers[worker_cls.type_name] = worker
+
+    def worker_for(self, type_name: str) -> JobWorker:
+        worker = self._workers.get(type_name)
+        if worker is None:
+            raise JobOperationError(f"Unknown job type: {type_name}")
+        return worker
+
+    # -- snapshots / reads ---------------------------------------------------
+
+    def _snapshot(self) -> list[JobRecord]:
+        return list(self._jobs.values())
+
+    def _require(self, job_id: str) -> JobRecord:
+        job = self._jobs.get(job_id)
+        if job is None:
+            raise JobNotFoundError(job_id)
+        return job
+
+    async def get(self, job_id: str) -> JobRecord | None:
+        job = self._jobs.get(job_id)
+        if job is None:
+            return None
+        await self._reconcile(job, emit_on_change=True)
+        return job
+
+    def run_id_for(self, job_id: str) -> str | None:
+        """Current run_id for a job, or None if unknown. A plain read — no
+        reconciliation (used by the best-effort errors endpoint)."""
+        job = self._jobs.get(job_id)
+        return job.run_id if job is not None else None
+
+    def list_jobs(
+        self,
+        status: BackgroundJobStatus | None = None,
+        type_name: str | None = None,
+        project_id: str | None = None,
+        since: datetime | None = None,
+        limit: int | None = None,
+    ) -> list[JobRecord]:
+        records = list(self._jobs.values())
+        if status is not None:
+            records = [r for r in records if r.status == status]
+        if type_name is not None:
+            records = [r for r in records if r.type == type_name]
+        if project_id is not None:
+            records = [r for r in records if r.project_id == project_id]
+        if since is not None:
+            records = [r for r in records if r.created_at >= since]
+        records.sort(key=lambda r: r.created_at, reverse=True)
+        if limit is not None:
+            records = records[:limit]
+        return records
+
+    # -- create --------------------------------------------------------------
+
+    async def create(
+        self,
+        type_name: str,
+        params: dict[str, Any] | BaseModel,
+        project_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> JobRecord:
+        worker = self.worker_for(type_name)
+        validated = self._validate_params(worker, params)
+        job_id = self._fresh_job_id()
+        job = JobRecord(
+            id=job_id,
+            type=type_name,
+            status=BackgroundJobStatus.PENDING,
+            params=validated.model_dump(mode="json"),
+            metadata=metadata or {},
+            project_id=project_id,
+            supports_pause=worker.supports_pause,
+        )
+        self._jobs[job_id] = job
+        self._pending_ids.append(job_id)
+        self._emit(job)
+        self._dispatch_pending()
+        return job
+
+    def _fresh_job_id(self) -> str:
+        job_id = _new_job_id()
+        while job_id in self._jobs:
+            job_id = _new_job_id()
+        return job_id
+
+    def _validate_params(
+        self, worker: JobWorker, params: dict[str, Any] | BaseModel
+    ) -> BaseModel:
+        if isinstance(params, worker.params_model):
+            return params
+        if isinstance(params, BaseModel):
+            params = params.model_dump()
+        return worker.params_model.model_validate(params)
+
+    # -- dispatch / supervision ---------------------------------------------
+
+    def _dispatch_pending(self) -> None:
+        while self._running_count < self._max_concurrent and self._pending_ids:
+            job_id = self._pending_ids.pop(0)
+            job = self._jobs.get(job_id)
+            if job is None or job.status != BackgroundJobStatus.PENDING:
+                continue
+            self._launch(job)
+
+    def _launch(self, job: JobRecord) -> None:
+        worker = self.worker_for(job.type)
+        run_id = str(uuid.uuid4())
+        job.run_id = run_id
+        job.status = BackgroundJobStatus.RUNNING
+        job.started_at = _utc_now()
+        self._touch(job)
+        self._running_count += 1
+        self._emit(job)
+        task = asyncio.create_task(self._supervise(job.id, worker, run_id))
+        self._tasks[job.id] = task
+
+    async def _supervise(self, job_id: str, worker: JobWorker, run_id: str) -> None:
+        job = self._jobs.get(job_id)
+        if job is None:
+            return
+        params = worker.params_model.model_validate(job.params)
+        ctx = self._build_context(job_id, run_id, worker)
+        try:
+            try:
+                await self._reconcile(job, emit_on_change=True)
+                if job.status == BackgroundJobStatus.SUCCEEDED:
+                    return
+                result = await worker.run(params, ctx)
+                # The cancellation transition is unconditional (functional_spec
+                # §2): a worker that catches CancelledError for cleanup and then
+                # returns normally — even one that calls task.uncancel() so it is
+                # never re-raised — must still land in paused/cancelled, not
+                # succeeded. The registry enforces this off its own delivery
+                # record rather than trusting the worker to re-raise. A worker
+                # that finished naturally before any cancel landed has no
+                # delivery recorded, so its result stands.
+                if job_id in self._cancel_delivered:
+                    self._finish_cancelled_or_paused(job)
+                else:
+                    self._finish_succeeded(job, result)
+            except asyncio.CancelledError:
+                self._finish_cancelled_or_paused(job)
+                raise
+            except Exception as exc:
+                self._finish_failed(job, run_id, exc)
+        finally:
+            self._release_slot(job_id)
+
+    def _build_context(self, job_id: str, run_id: str, worker: JobWorker) -> JobContext:
+        async def report_progress(update: JobProgressUpdate) -> None:
+            job = self._jobs.get(job_id)
+            if job is None or job.run_id != run_id:
+                return
+            job.progress = JobProgress(
+                total=update.total if update.total is not None else job.progress.total,
+                success=update.success,
+                error=update.error,
+                message=update.message
+                if update.message is not None
+                else job.progress.message,
+            )
+            self._touch(job)
+            self._emit(job)
+
+        async def report_progress_detail(detail: BaseModel) -> None:
+            job = self._jobs.get(job_id)
+            if job is None or job.run_id != run_id:
+                return
+            # Guard the worker's contract: the detail must be the model the
+            # worker declared, so progress_detail's shape is predictable for
+            # the frontend that casts it.
+            expected = worker.progress_model
+            if expected is not None and not isinstance(detail, expected):
+                raise TypeError(
+                    f"report_progress_detail expected {expected.__name__}, "
+                    f"got {type(detail).__name__}"
+                )
+            job.progress_detail = detail.model_dump(mode="json")
+            self._touch(job)
+            self._emit(job)
+
+        async def report_error(message: str, extra: dict[str, Any]) -> None:
+            error_log.append_error(run_id, {"error_message": message, **extra})
+
+        return JobContext(
+            job_id, run_id, report_progress, report_progress_detail, report_error
+        )
+
+    def _finish_succeeded(self, job: JobRecord, result: BaseModel) -> None:
+        job.status = BackgroundJobStatus.SUCCEEDED
+        job.result = result.model_dump(mode="json")
+        job.ended_at = _utc_now()
+        self._touch(job)
+        self._emit(job)
+
+    def _finish_failed(self, job: JobRecord, run_id: str, exc: Exception) -> None:
+        job.status = BackgroundJobStatus.FAILED
+        job.error = JobError(error=str(exc) or exc.__class__.__name__)
+        job.ended_at = _utc_now()
+        self._touch(job)
+        error_log.append_error(
+            run_id,
+            {
+                "error_message": str(exc) or exc.__class__.__name__,
+                "traceback": "".join(
+                    traceback.format_exception(type(exc), exc, exc.__traceback__)
+                ),
+                "fatal": True,
+            },
+        )
+        self._emit(job)
+
+    def _finish_cancelled_or_paused(self, job: JobRecord) -> None:
+        if job.id in self._pause_intent:
+            job.status = BackgroundJobStatus.PAUSED
+        else:
+            job.status = BackgroundJobStatus.CANCELLED
+            job.ended_at = _utc_now()
+        self._touch(job)
+        self._emit(job)
+
+    # -- lifecycle controls --------------------------------------------------
+
+    async def pause(self, job_id: str) -> JobRecord:
+        job = self._require(job_id)
+        if not job.supports_pause:
+            raise JobOperationError(f"Job type '{job.type}' does not support pause")
+        if job.status != BackgroundJobStatus.RUNNING:
+            raise JobOperationError(
+                f"Cannot pause a job in status '{job.status.value}'"
+            )
+        self._pause_intent.add(job_id)
+        await self._cancel_task(job_id)
+        # If run() completed naturally during the cancel await, the job is
+        # already terminal — leave that state intact rather than forcing paused.
+        if job.status.is_terminal:
+            return job
+        if job.status != BackgroundJobStatus.PAUSED:
+            job.status = BackgroundJobStatus.PAUSED
+            self._touch(job)
+        worker = self.worker_for(job.type)
+        params = worker.params_model.model_validate(job.params)
+        derived = await worker.compute_state(params)
+        self._apply_derived(job, derived)
+        self._emit(job)
+        return job
+
+    async def resume(self, job_id: str) -> JobRecord:
+        job = self._require(job_id)
+        if job.status != BackgroundJobStatus.PAUSED:
+            raise JobOperationError(
+                f"Cannot resume a job in status '{job.status.value}'"
+            )
+        worker = self.worker_for(job.type)
+        params = worker.params_model.model_validate(job.params)
+        derived = await worker.compute_state(params)
+        if derived is not None and derived.is_complete:
+            self._apply_derived(job, derived)
+            job.status = BackgroundJobStatus.SUCCEEDED
+            job.ended_at = _utc_now()
+            self._touch(job)
+            self._emit(job)
+            return job
+        self._apply_derived(job, derived)
+        job.status = BackgroundJobStatus.PENDING
+        self._touch(job)
+        self._pending_ids.append(job_id)
+        self._emit(job)
+        self._dispatch_pending()
+        return job
+
+    async def cancel(self, job_id: str) -> JobRecord:
+        job = self._require(job_id)
+        if job.status.is_terminal:
+            raise JobOperationError(
+                f"Cannot cancel a job in status '{job.status.value}'"
+            )
+        if job.status == BackgroundJobStatus.PENDING:
+            self._remove_pending(job_id)
+            job.status = BackgroundJobStatus.CANCELLED
+            job.ended_at = _utc_now()
+            self._touch(job)
+            self._emit(job)
+            return job
+        if job.status == BackgroundJobStatus.PAUSED:
+            job.status = BackgroundJobStatus.CANCELLED
+            job.ended_at = _utc_now()
+            self._touch(job)
+            self._emit(job)
+            return job
+        self._cancel_intent.add(job_id)
+        await self._cancel_task(job_id)
+        if not job.status.is_terminal:
+            job.status = BackgroundJobStatus.CANCELLED
+            job.ended_at = _utc_now()
+            self._touch(job)
+            self._emit(job)
+        return self._jobs[job_id]
+
+    async def delete(self, job_id: str) -> None:
+        job = self._require(job_id)
+        if not job.status.is_terminal:
+            raise JobOperationError(
+                f"Cannot delete a job in status '{job.status.value}'"
+            )
+        self._jobs.pop(job_id, None)
+        self._remove_pending(job_id)
+        self._completion_events.pop(job_id, None)
+        if job.run_id is not None:
+            error_log.delete_errors(job.run_id)
+        self.events.publish_deleted(job_id, job.type, job.project_id)
+
+    async def _cancel_task(self, job_id: str) -> None:
+        task = self._tasks.get(job_id)
+        if task is None:
+            return
+        # cancel() returns True only if the request landed on a not-yet-done
+        # task — i.e. the cancellation is actually delivered to the worker. If
+        # it returns False the worker already finished naturally; we must not
+        # override that terminal result.
+        if task.cancel():
+            self._cancel_delivered.add(job_id)
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+        except Exception:
+            # The worker raised while we awaited its cancellation. _supervise
+            # already routed this to the failed/terminal state and logged it;
+            # we only debug-log here so it isn't silently discarded.
+            logger.debug(
+                "Worker for job %s raised during cancel await", job_id, exc_info=True
+            )
+        # If the task was cancelled before its coroutine body ever ran, its own
+        # finally never executed, so reclaim the slot here. Idempotent: whoever
+        # pops job_id from _tasks first owns the single decrement.
+        self._release_slot(job_id)
+
+    def _release_slot(self, job_id: str) -> None:
+        if self._tasks.pop(job_id, None) is None:
+            return
+        self._cancel_intent.discard(job_id)
+        self._pause_intent.discard(job_id)
+        self._cancel_delivered.discard(job_id)
+        self._running_count -= 1
+        self._dispatch_pending()
+
+    def _remove_pending(self, job_id: str) -> None:
+        try:
+            self._pending_ids.remove(job_id)
+        except ValueError:
+            pass
+
+    # -- reconciliation ------------------------------------------------------
+
+    async def _reconcile(self, job: JobRecord, emit_on_change: bool) -> bool:
+        worker = self._workers.get(job.type)
+        if worker is None:
+            return False
+        params = worker.params_model.model_validate(job.params)
+        try:
+            derived = await worker.compute_state(params)
+        except Exception:
+            # compute_state may touch on-disk entities (project/task/eval) that
+            # could be deleted or transiently unavailable. A failure here must
+            # not 500 the GET /api/jobs/{id} read or crash _supervise's initial
+            # reconcile — fall back to the last known in-memory state.
+            logger.exception("Failed to compute state for job %s", job.id)
+            return False
+        if derived is None:
+            return False
+        changed = self._apply_derived(job, derived)
+        if derived.is_complete and not job.status.is_terminal:
+            job.status = BackgroundJobStatus.SUCCEEDED
+            job.ended_at = _utc_now()
+            self._touch(job)
+            changed = True
+        if changed and emit_on_change:
+            self._emit(job)
+        return changed
+
+    def _apply_derived(self, job: JobRecord, derived: JobDerivedState | None) -> bool:
+        if derived is None:
+            return False
+        new_progress = JobProgress(
+            total=derived.total if derived.total is not None else job.progress.total,
+            success=derived.success,
+            error=derived.error,
+            message=derived.message
+            if derived.message is not None
+            else job.progress.message,
+        )
+        before = job.progress.model_dump(exclude={"updated_at"})
+        after = new_progress.model_dump(exclude={"updated_at"})
+        if before == after:
+            return False
+        job.progress = new_progress
+        self._touch(job)
+        return True
+
+    # -- helpers -------------------------------------------------------------
+
+    def _touch(self, job: JobRecord) -> None:
+        job.updated_at = _utc_now()
+
+    def _emit(self, job: JobRecord) -> None:
+        self.events.publish_job(job)
+        if job.status.is_terminal:
+            ev = self._completion_events.get(job.id)
+            if ev is not None:
+                ev.set()
+
+    # -- await completion ----------------------------------------------------
+
+    async def wait(self, job_id: str, timeout: float | None = None) -> JobRecord:
+        """Observe a job until it reaches a terminal state, then return its record.
+
+        A pure observer, mirroring the SSE stream's decoupling: cancelling this
+        await (caller drops off / client disconnects) tears down only the awaiter
+        — the job's supervising task is owned by the registry and keeps running.
+        Multi-waiter safe: all awaiters of a job share one Event. timeout=None
+        waits indefinitely; on timeout asyncio.wait_for raises
+        asyncio.TimeoutError, which propagates to the caller.
+        """
+        job = self._require(job_id)
+        # Create the event before the terminal check so there's no race window:
+        # single-threaded asyncio guarantees no await between setdefault and the
+        # check, and _emit only sets the event if it already exists here.
+        ev = self._completion_events.setdefault(job_id, asyncio.Event())
+        if job.status.is_terminal:
+            return job
+        await asyncio.wait_for(ev.wait(), timeout)
+        return job
+
+
+job_registry = JobRegistry()
diff --git a/app/desktop/studio_server/jobs/test_api.py b/app/desktop/studio_server/jobs/test_api.py
new file mode 100644
index 000000000..46e5b8053
--- /dev/null
+++ b/app/desktop/studio_server/jobs/test_api.py
@@ -0,0 +1,773 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import uuid
+
+import httpx
+import pytest
+import pytest_asyncio
+from app.desktop.studio_server.jobs import api as jobs_api
+from app.desktop.studio_server.jobs import error_log
+from app.desktop.studio_server.jobs.api import connect_jobs_api
+from app.desktop.studio_server.jobs.models import (
+    BackgroundJobStatus,
+    JobDerivedState,
+    JobWorker,
+)
+from app.desktop.studio_server.jobs.registry import JobOperationError, JobRegistry
+from app.desktop.studio_server.jobs.workers.noop import NoopJobWorker
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+
+async def _safe_cancel(registry: JobRegistry, job_id: str) -> None:
+    """Best-effort cleanup cancel; ignore a job that already reached terminal."""
+    try:
+        await registry.cancel(job_id)
+    except JobOperationError:
+        pass
+
+
+@pytest.fixture(autouse=True)
+def temp_error_log_dir(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir",
+        lambda: str(tmp_path),
+    )
+
+
+# -- supporting test workers -------------------------------------------------
+
+
+class _ProjectParams(BaseModel):
+    project_id: str
+    steps: int = 50
+    sleep_per_step_seconds: float = 0.05
+
+
+class _EmptyResult(BaseModel):
+    pass
+
+
+class ProjectScopedWorker(JobWorker[_ProjectParams, _EmptyResult]):
+    """A worker whose params carry a project_id, so the record gets one."""
+
+    type_name = "project_scoped"
+    params_model = _ProjectParams
+    result_model = _EmptyResult
+    supports_pause = True
+
+    async def run(self, params, ctx):
+        await asyncio.sleep(5)
+        return _EmptyResult()
+
+
+class _EmptyParams(BaseModel):
+    pass
+
+
+class ReconcileCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """compute_state flips to complete once `done` is set, so a GET reconciles
+    the running job straight to succeeded."""
+
+    type_name = "reconcile_complete"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    done = False
+
+    async def compute_state(self, params):
+        complete = type(self).done
+        return JobDerivedState(
+            total=3, success=3 if complete else 1, error=0, is_complete=complete
+        )
+
+    async def run(self, params, ctx):
+        await asyncio.sleep(5)
+        return _EmptyResult()
+
+
+class NonPausableWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    type_name = "nonpausable"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = False
+
+    async def run(self, params, ctx):
+        await asyncio.sleep(5)
+        return _EmptyResult()
+
+
+# -- fixtures ----------------------------------------------------------------
+
+
+@pytest.fixture
+def registry(monkeypatch):
+    """Patch a fresh registry in for isolation, then register the test workers."""
+    reg = JobRegistry(max_concurrent=10)
+    monkeypatch.setattr(jobs_api, "job_registry", reg)
+    reg.register_type(NoopJobWorker)
+    reg.register_type(ProjectScopedWorker)
+    reg.register_type(ReconcileCompleteWorker)
+    reg.register_type(NonPausableWorker)
+    return reg
+
+
+@pytest.fixture
+def app(registry):
+    app = FastAPI()
+    connect_jobs_api(app)
+    return app
+
+
+@pytest_asyncio.fixture
+async def client(app):
+    # Async client over ASGI so handlers AND the registry's background tasks
+    # share the test's event loop — background jobs progress while we await.
+    transport = httpx.ASGITransport(app=app)
+    async with httpx.AsyncClient(
+        transport=transport, base_url="http://test"
+    ) as http_client:
+        yield http_client
+
+
+async def _wait_for_status(
+    registry: JobRegistry,
+    job_id: str,
+    target: BackgroundJobStatus | set[BackgroundJobStatus],
+    timeout: float = 3.0,
+) -> None:
+    targets = {target} if isinstance(target, BackgroundJobStatus) else target
+    deadline = asyncio.get_event_loop().time() + timeout
+    while asyncio.get_event_loop().time() < deadline:
+        job = registry._jobs.get(job_id)
+        if job is not None and job.status in targets:
+            return
+        await asyncio.sleep(0.01)
+    job = registry._jobs.get(job_id)
+    actual = job.status if job else "missing"
+    raise AssertionError(f"Job {job_id} did not reach {targets}; was {actual}")
+
+
+async def _create_noop(client, **params) -> str:
+    body = {"steps": 50, "sleep_per_step_seconds": 0.05}
+    body.update(params)
+    resp = await client.post("/api/jobs/noop", json={"params": body})
+    assert resp.status_code == 201, resp.text
+    return resp.json()["job_id"]
+
+
+# -- create ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_create_returns_201_and_status(client):
+    resp = await client.post(
+        "/api/jobs/noop",
+        json={"params": {"steps": 3, "sleep_per_step_seconds": 0.01}},
+    )
+    assert resp.status_code == 201
+    body = resp.json()
+    assert body["job_id"].startswith("j_")
+    assert body["status"] in ("pending", "running")
+
+
+@pytest.mark.asyncio
+async def test_create_unknown_type_404(client):
+    resp = await client.post("/api/jobs/does_not_exist", json={"params": {}})
+    assert resp.status_code == 404
+    assert "Unknown job type" in resp.json()["detail"]
+
+
+@pytest.mark.asyncio
+async def test_create_invalid_params_422(client):
+    resp = await client.post("/api/jobs/noop", json={"params": {"steps": "not-an-int"}})
+    assert resp.status_code == 422
+
+
+@pytest.mark.asyncio
+async def test_create_stores_metadata_and_project_id(client, registry):
+    resp = await client.post(
+        "/api/jobs/project_scoped",
+        json={"params": {"project_id": "p_abc"}, "metadata": {"source": "test"}},
+    )
+    assert resp.status_code == 201
+    job_id = resp.json()["job_id"]
+    record = registry._jobs[job_id]
+    assert record.project_id == "p_abc"
+    assert record.metadata == {"source": "test"}
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_create_noop_has_null_project_id(client, registry):
+    job_id = await _create_noop(client)
+    assert registry._jobs[job_id].project_id is None
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_create_explicit_project_id_scopes_typeless_job(client, registry):
+    # A job whose params carry no project_id (noop) still gets scoped when the
+    # request body sets project_id explicitly — this is what the project-filtered
+    # jobs panel / SSE stream rely on to show such jobs.
+    resp = await client.post(
+        "/api/jobs/noop",
+        json={
+            "params": {"steps": 50, "sleep_per_step_seconds": 0.05},
+            "project_id": "p_explicit",
+        },
+    )
+    assert resp.status_code == 201
+    job_id = resp.json()["job_id"]
+    assert registry._jobs[job_id].project_id == "p_explicit"
+    rows = (await client.get("/api/jobs", params={"project_id": "p_explicit"})).json()
+    assert any(r["id"] == job_id for r in rows)
+    await registry.cancel(job_id)
+
+
+# -- list --------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_list_empty(client):
+    resp = await client.get("/api/jobs")
+    assert resp.status_code == 200
+    assert resp.json() == []
+
+
+@pytest.mark.asyncio
+async def test_list_returns_jobs_sorted_desc(client, registry):
+    first = await _create_noop(client)
+    second = await _create_noop(client)
+    resp = await client.get("/api/jobs")
+    assert resp.status_code == 200
+    ids = [r["id"] for r in resp.json()]
+    assert ids[0] == second
+    assert ids[1] == first
+    await registry.cancel(first)
+    await registry.cancel(second)
+
+
+@pytest.mark.asyncio
+async def test_list_filter_by_type(client, registry):
+    await _create_noop(client)
+    await client.post("/api/jobs/project_scoped", json={"params": {"project_id": "p1"}})
+    resp = await client.get("/api/jobs", params={"type": "project_scoped"})
+    assert resp.status_code == 200
+    rows = resp.json()
+    assert len(rows) == 1
+    assert rows[0]["type"] == "project_scoped"
+
+
+@pytest.mark.asyncio
+async def test_list_filter_by_status(client, registry):
+    job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.get("/api/jobs", params={"status": "succeeded"})
+    assert [r["id"] for r in resp.json()] == [job_id]
+    resp = await client.get("/api/jobs", params={"status": "running"})
+    assert resp.json() == []
+
+
+@pytest.mark.asyncio
+async def test_list_filter_by_project_id(client):
+    await client.post(
+        "/api/jobs/project_scoped", json={"params": {"project_id": "p_one"}}
+    )
+    await client.post(
+        "/api/jobs/project_scoped", json={"params": {"project_id": "p_two"}}
+    )
+    resp = await client.get("/api/jobs", params={"project_id": "p_one"})
+    rows = resp.json()
+    assert len(rows) == 1
+    assert rows[0]["project_id"] == "p_one"
+
+
+@pytest.mark.asyncio
+async def test_list_limit(client):
+    for _ in range(3):
+        await _create_noop(client)
+    resp = await client.get("/api/jobs", params={"limit": 2})
+    assert len(resp.json()) == 2
+
+
+@pytest.mark.asyncio
+async def test_list_since_excludes_older(client, registry):
+    old_id = await _create_noop(client)
+    newer_id = await _create_noop(client)
+    cutoff = registry._jobs[newer_id].created_at.isoformat()
+    resp = await client.get("/api/jobs", params={"since": cutoff})
+    ids = [r["id"] for r in resp.json()]
+    assert newer_id in ids
+    assert old_id not in ids
+
+
+# -- get ---------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_get_returns_record(client, registry):
+    job_id = await _create_noop(client)
+    resp = await client.get(f"/api/jobs/{job_id}")
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["id"] == job_id
+    assert body["type"] == "noop"
+    assert "progress" in body
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_get_unknown_404(client):
+    resp = await client.get("/api/jobs/j_missing")
+    assert resp.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_get_reconciles_to_succeeded(client, registry):
+    ReconcileCompleteWorker.done = False
+    resp = await client.post("/api/jobs/reconcile_complete", json={"params": {}})
+    job_id = resp.json()["job_id"]
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    ReconcileCompleteWorker.done = True
+    got = await client.get(f"/api/jobs/{job_id}")
+    assert got.status_code == 200
+    assert got.json()["status"] == "succeeded"
+    assert got.json()["progress"]["success"] == 3
+
+
+# -- result ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_result_200_when_terminal(client, registry):
+    job_id = await _create_noop(client, steps=3, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.get(f"/api/jobs/{job_id}/result")
+    assert resp.status_code == 200
+    assert resp.json() == {"completed_steps": 3}
+
+
+@pytest.mark.asyncio
+async def test_result_404_when_not_terminal(client, registry):
+    job_id = await _create_noop(client)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.get(f"/api/jobs/{job_id}/result")
+    assert resp.status_code == 404
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_result_404_unknown(client):
+    resp = await client.get("/api/jobs/j_missing/result")
+    assert resp.status_code == 404
+
+
+# -- errors ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_errors_returns_array(client, registry):
+    resp = await client.post(
+        "/api/jobs/noop",
+        json={
+            "params": {
+                "steps": 4,
+                "sleep_per_step_seconds": 0.01,
+                "error_at_steps": [1, 3],
+            }
+        },
+    )
+    job_id = resp.json()["job_id"]
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.get(f"/api/jobs/{job_id}/errors")
+    assert resp.status_code == 200
+    messages = [e["error_message"] for e in resp.json()]
+    assert "intentional error at step 1" in messages
+    assert "intentional error at step 3" in messages
+
+
+@pytest.mark.asyncio
+async def test_errors_empty_when_none(client, registry):
+    job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.get(f"/api/jobs/{job_id}/errors")
+    assert resp.status_code == 200
+    assert resp.json() == []
+
+
+@pytest.mark.asyncio
+async def test_errors_unknown_job_returns_empty_200(client):
+    resp = await client.get("/api/jobs/j_missing/errors")
+    assert resp.status_code == 200
+    assert resp.json() == []
+
+
+@pytest.mark.asyncio
+async def test_errors_specific_run_id(client):
+    run_id = str(uuid.uuid4())
+    error_log.append_error(run_id, {"error_message": "from a past run"})
+    resp = await client.get("/api/jobs/j_missing/errors", params={"run_id": run_id})
+    assert resp.status_code == 200
+    assert resp.json() == [{"error_message": "from a past run"}]
+
+
+# -- pause / resume / cancel -------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_pause_then_resume(client, registry):
+    job_id = await _create_noop(client, steps=50, sleep_per_step_seconds=0.03)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+
+    resp = await client.post(f"/api/jobs/{job_id}/pause")
+    assert resp.status_code == 202
+    assert registry._jobs[job_id].status == BackgroundJobStatus.PAUSED
+
+    resp = await client.post(f"/api/jobs/{job_id}/resume")
+    assert resp.status_code == 202
+    assert registry._jobs[job_id].status in (
+        BackgroundJobStatus.PENDING,
+        BackgroundJobStatus.RUNNING,
+    )
+
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_pause_409_when_not_running(client, registry):
+    job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.post(f"/api/jobs/{job_id}/pause")
+    assert resp.status_code == 409
+
+
+@pytest.mark.asyncio
+async def test_pause_409_when_unsupported(client, registry):
+    resp = await client.post("/api/jobs/nonpausable", json={"params": {}})
+    job_id = resp.json()["job_id"]
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.post(f"/api/jobs/{job_id}/pause")
+    assert resp.status_code == 409
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_pause_unknown_404(client):
+    resp = await client.post("/api/jobs/j_missing/pause")
+    assert resp.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_resume_409_when_not_paused(client, registry):
+    job_id = await _create_noop(client)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.post(f"/api/jobs/{job_id}/resume")
+    assert resp.status_code == 409
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_cancel_202(client, registry):
+    job_id = await _create_noop(client)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.post(f"/api/jobs/{job_id}/cancel")
+    assert resp.status_code == 202
+    assert registry._jobs[job_id].status == BackgroundJobStatus.CANCELLED
+
+
+@pytest.mark.asyncio
+async def test_cancel_409_when_terminal(client, registry):
+    job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.post(f"/api/jobs/{job_id}/cancel")
+    assert resp.status_code == 409
+
+
+@pytest.mark.asyncio
+async def test_cancel_unknown_404(client):
+    resp = await client.post("/api/jobs/j_missing/cancel")
+    assert resp.status_code == 404
+
+
+# -- delete ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_delete_204_when_terminal(client, registry):
+    job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED)
+    resp = await client.delete(f"/api/jobs/{job_id}")
+    assert resp.status_code == 204
+    assert job_id not in registry._jobs
+    assert (await client.get("/api/jobs")).json() == []
+
+
+@pytest.mark.asyncio
+async def test_delete_409_when_in_flight(client, registry):
+    job_id = await _create_noop(client)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.delete(f"/api/jobs/{job_id}")
+    assert resp.status_code == 409
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_delete_unknown_404(client):
+    resp = await client.delete("/api/jobs/j_missing")
+    assert resp.status_code == 404
+
+
+# -- wait --------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_wait_endpoint_200_terminal_record(client):
+    resp = await client.post(
+        "/api/jobs/noop", json={"params": {"steps": 3, "sleep_per_step_seconds": 0.02}}
+    )
+    job_id = resp.json()["job_id"]
+    got = await client.get(f"/api/jobs/{job_id}/wait", timeout=10.0)
+    assert got.status_code == 200, got.text
+    body = got.json()
+    assert body["id"] == job_id
+    assert body["status"] == "succeeded"
+    assert body["result"] == {"completed_steps": 3}
+
+
+@pytest.mark.asyncio
+async def test_wait_endpoint_404_unknown(client):
+    resp = await client.get("/api/jobs/j_missing/wait")
+    assert resp.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_wait_endpoint_504_on_timeout(client, registry):
+    job_id = await _create_noop(client, steps=50, sleep_per_step_seconds=0.05)
+    await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING)
+    resp = await client.get(f"/api/jobs/{job_id}/wait", params={"timeout": 0.01})
+    assert resp.status_code == 504
+    await registry.cancel(job_id)
+
+
+@pytest.mark.asyncio
+async def test_create_wait_true_returns_terminal_record(client):
+    resp = await client.post(
+        "/api/jobs/noop",
+        params={"wait": "true"},
+        json={"params": {"steps": 3, "sleep_per_step_seconds": 0.02}},
+        timeout=10.0,
+    )
+    assert resp.status_code == 201, resp.text
+    body = resp.json()
+    assert body["id"].startswith("j_")
+    assert body["status"] == "succeeded"
+    assert body["result"] == {"completed_steps": 3}
+
+
+@pytest.mark.asyncio
+async def test_create_wait_false_returns_create_response(client, registry):
+    resp = await client.post(
+        "/api/jobs/noop",
+        params={"wait": "false"},
+        json={"params": {"steps": 50, "sleep_per_step_seconds": 0.05}},
+    )
+    assert resp.status_code == 201
+    body = resp.json()
+    assert body["job_id"].startswith("j_")
+    assert body["status"] in ("pending", "running")
+    assert "result" not in body
+    await registry.cancel(body["job_id"])
+
+
+@pytest.mark.asyncio
+async def test_create_wait_true_timeout_504(client, registry):
+    resp = await client.post(
+        "/api/jobs/noop",
+        params={"wait": "true", "timeout": 0.01},
+        json={"params": {"steps": 50, "sleep_per_step_seconds": 0.05}},
+    )
+    assert resp.status_code == 504
+    # The job was still created and keeps running despite the awaiter timing out.
+    running = [r for r in registry.list_jobs() if not r.status.is_terminal]
+    assert len(running) == 1
+    await registry.cancel(running[0].id)
+
+
+# -- wiring ------------------------------------------------------------------
+
+
+def test_connect_jobs_api_registers_noop_idempotently(monkeypatch):
+    reg = JobRegistry(max_concurrent=2)
+    monkeypatch.setattr(jobs_api, "job_registry", reg)
+    app = FastAPI()
+    connect_jobs_api(app)
+    connect_jobs_api(app)  # second call must not raise
+    assert "noop" in reg._workers
+
+
+# -- SSE ---------------------------------------------------------------------
+
+
+def test_format_sse_wire_format():
+    from app.desktop.studio_server.jobs.events import JobEvent
+
+    event = JobEvent(event="job", data={"id": "j_abc", "status": "running"})
+    wire = jobs_api._format_sse(event)
+    assert wire == 'event: job\ndata: {"id": "j_abc", "status": "running"}\n\n'
+
+
+@pytest.mark.asyncio
+async def test_event_stream_forwards_snapshot_then_job(registry):
+    # Unit-level test of the generator (independent of any HTTP transport): a
+    # subscriber gets the initial snapshot, and a job created afterward produces
+    # a `job` event. Proves pure-observer forwarding of the Phase 1 bus.
+    stream = jobs_api._event_stream(job_id=None, type_name=None, project_id=None)
+    try:
+        first = await asyncio.wait_for(stream.__anext__(), timeout=3.0)
+        assert first.startswith("event: snapshot\n")
+
+        job = await registry.create(
+            "noop", {"steps": 40, "sleep_per_step_seconds": 0.05}
+        )
+        # Drain until we see a job event for our job.
+        deadline = asyncio.get_event_loop().time() + 3.0
+        saw_job = False
+        while asyncio.get_event_loop().time() < deadline:
+            chunk = await asyncio.wait_for(stream.__anext__(), timeout=3.0)
+            if chunk.startswith("event: job\n") and job.id in chunk:
+                saw_job = True
+                break
+        assert saw_job
+        await _safe_cancel(registry, job.id)
+    finally:
+        await stream.aclose()
+
+
+def _parse_sse_block(block: str) -> tuple[str | None, dict | None]:
+    event_name: str | None = None
+    data: dict | None = None
+    for line in block.splitlines():
+        if line.startswith("event:"):
+            event_name = line[len("event:") :].strip()
+        elif line.startswith("data:"):
+            data = json.loads(line[len("data:") :].strip())
+    return event_name, data
+
+
+# The SSE endpoint is now a correctly *infinite* stream (it pings forever until
+# the client disconnects or the bus shuts down). httpx's ASGITransport runs the
+# app to completion and buffers the whole body before returning a response, and
+# its `receive()` only yields http.disconnect once the response is complete — so
+# it cannot exercise an open-ended stream incrementally or simulate a mid-stream
+# disconnect. We therefore drive `_event_stream` / `subscribe` directly for the
+# streaming-content behavior, and keep one HTTP-level test that ends the stream
+# via `events.shutdown()` so ASGITransport can return the buffered response.
+
+
+async def _read_stream_until(stream, target: str, timeout: float = 3.0) -> dict:
+    """Pull SSE blocks straight from the `_event_stream` async generator until
+    one matches `target`; return its parsed data."""
+    deadline = asyncio.get_event_loop().time() + timeout
+    while asyncio.get_event_loop().time() < deadline:
+        chunk = await asyncio.wait_for(stream.__anext__(), timeout=timeout)
+        event_name, data = _parse_sse_block(chunk)
+        if event_name == target and data is not None:
+            return data
+    raise AssertionError(f"did not see event '{target}' within {timeout}s")
+
+
+def _parse_sse_body(body: str) -> list[tuple[str | None, dict | None]]:
+    return [_parse_sse_block(b) for b in body.split("\n\n") if b.strip()]
+
+
+@pytest.mark.asyncio
+async def test_sse_endpoint_returns_event_stream_and_ends_on_shutdown(app, registry):
+    # Full HTTP path: correct status + content-type and an initial snapshot.
+    # The stream is infinite, and ASGITransport buffers the whole body, so we
+    # end it with events.shutdown() (the same hook the server uses on reload)
+    # to let the buffered response come back.
+    transport = httpx.ASGITransport(app=app)
+    async with httpx.AsyncClient(
+        transport=transport, base_url="http://test"
+    ) as http_client:
+        get = asyncio.ensure_future(http_client.get("/api/jobs/events"))
+        # Wait until the endpoint's subscription is registered, then shut the
+        # bus so the (otherwise infinite) stream returns.
+        for _ in range(300):
+            if registry.events._subscribers:
+                break
+            await asyncio.sleep(0.01)
+        else:
+            raise AssertionError("SSE subscription never registered")
+        registry.events.shutdown()
+
+        response = await asyncio.wait_for(get, timeout=3.0)
+        assert response.status_code == 200
+        assert response.headers["content-type"].startswith("text/event-stream")
+        blocks = _parse_sse_body(response.text)
+        assert ("snapshot", {"jobs": []}) in blocks
+
+
+@pytest.mark.asyncio
+async def test_event_stream_emits_keepalive_ping(registry, monkeypatch):
+    # The keepalive is the regression we fixed: a timeout must yield a `: ping`
+    # comment WITHOUT finalizing the generator, so MANY pings arrive over time.
+    monkeypatch.setattr(jobs_api, "KEEPALIVE_SECONDS", 0.05)
+    stream = jobs_api._event_stream(job_id=None, type_name=None, project_id=None)
+    try:
+        first = await asyncio.wait_for(stream.__anext__(), timeout=3.0)
+        assert first.startswith("event: snapshot\n")
+        # Two consecutive pings prove the stream survives repeated timeouts.
+        for _ in range(2):
+            chunk = await asyncio.wait_for(stream.__anext__(), timeout=3.0)
+            assert chunk == ": ping\n\n"
+    finally:
+        await stream.aclose()
+
+
+@pytest.mark.asyncio
+async def test_event_stream_filters_by_job_id(registry):
+    # Both jobs run; only `target`'s events reach a job_id-filtered stream.
+    other = await registry.create("noop", {"steps": 40, "sleep_per_step_seconds": 0.05})
+    target = await registry.create(
+        "noop", {"steps": 40, "sleep_per_step_seconds": 0.05}
+    )
+    stream = jobs_api._event_stream(job_id=target.id, type_name=None, project_id=None)
+    try:
+        snapshot = await _read_stream_until(stream, "snapshot")
+        snapshot_ids = {j["id"] for j in snapshot["jobs"]}
+        assert target.id in snapshot_ids
+        assert other.id not in snapshot_ids
+
+        # Every live event that arrives is for the target, never `other`.
+        data = await _read_stream_until(stream, "job")
+        assert data["id"] == target.id
+    finally:
+        await stream.aclose()
+    await _safe_cancel(registry, other.id)
+    await _safe_cancel(registry, target.id)
+
+
+@pytest.mark.asyncio
+async def test_event_stream_disconnect_leaves_job_running(registry):
+    """The decoupling guarantee: dropping the SSE stream mid-run must NOT stop
+    the job. Only explicit cancel/pause stops a job. Closing the generator is
+    exactly what CancellableStreamingResponse does on a real client disconnect."""
+    job = await registry.create("noop", {"steps": 6, "sleep_per_step_seconds": 0.05})
+
+    stream = jobs_api._event_stream(job_id=None, type_name=None, project_id=None)
+    await _read_stream_until(stream, "snapshot")
+    # Observe at least one live job event so we know the run is underway.
+    await _read_stream_until(stream, "job")
+    # Simulate the client disconnecting mid-stream.
+    await stream.aclose()
+
+    assert registry._jobs[job.id].status in (
+        BackgroundJobStatus.RUNNING,
+        BackgroundJobStatus.SUCCEEDED,
+    )
+    await _wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    assert registry._jobs[job.id].result == {"completed_steps": 6}
diff --git a/app/desktop/studio_server/jobs/test_error_log.py b/app/desktop/studio_server/jobs/test_error_log.py
new file mode 100644
index 000000000..d4291c9de
--- /dev/null
+++ b/app/desktop/studio_server/jobs/test_error_log.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import uuid
+
+import pytest
+
+from app.desktop.studio_server.jobs import error_log
+
+
+@pytest.fixture
+def run_id(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir",
+        lambda: str(tmp_path),
+    )
+    return str(uuid.uuid4())
+
+
+def test_append_and_read_round_trip(run_id):
+    error_log.append_error(run_id, {"error_message": "first", "step": 1})
+    error_log.append_error(run_id, {"error_message": "second", "item_id": "x"})
+
+    entries = error_log.read_errors(run_id)
+    assert entries == [
+        {"error_message": "first", "step": 1},
+        {"error_message": "second", "item_id": "x"},
+    ]
+
+
+def test_read_missing_file_returns_empty(run_id):
+    assert error_log.read_errors(run_id) == []
+
+
+def test_read_skips_unparsable_lines(run_id):
+    error_log.append_error(run_id, {"error_message": "good"})
+    with error_log.error_log_path(run_id).open("a", encoding="utf-8") as f:
+        f.write("not json at all\n")
+        f.write("\n")
+    error_log.append_error(run_id, {"error_message": "also good"})
+
+    entries = error_log.read_errors(run_id)
+    assert entries == [
+        {"error_message": "good"},
+        {"error_message": "also good"},
+    ]
+
+
+def test_delete_removes_file(run_id):
+    error_log.append_error(run_id, {"error_message": "x"})
+    assert error_log.error_log_path(run_id).exists()
+
+    error_log.delete_errors(run_id)
+    assert not error_log.error_log_path(run_id).exists()
+    assert error_log.read_errors(run_id) == []
+
+
+def test_delete_missing_file_is_noop(run_id):
+    error_log.delete_errors(run_id)
+    assert error_log.read_errors(run_id) == []
+
+
+def test_append_never_raises_on_bad_dir(monkeypatch, run_id):
+    def boom(*args, **kwargs):
+        raise OSError("disk full")
+
+    monkeypatch.setattr("app.desktop.studio_server.jobs.error_log.Path.mkdir", boom)
+    error_log.append_error(run_id, {"error_message": "swallowed"})
diff --git a/app/desktop/studio_server/jobs/test_events.py b/app/desktop/studio_server/jobs/test_events.py
new file mode 100644
index 000000000..95eb19b2c
--- /dev/null
+++ b/app/desktop/studio_server/jobs/test_events.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+from app.desktop.studio_server.jobs.events import JobEvent, JobEventBus
+from app.desktop.studio_server.jobs.models import BackgroundJobStatus, JobRecord
+
+
+def _record(
+    job_id: str = "j_aaaaaaaaaaaa",
+    type_name: str = "noop",
+    project_id: str | None = None,
+    status: BackgroundJobStatus = BackgroundJobStatus.RUNNING,
+) -> JobRecord:
+    return JobRecord(
+        id=job_id,
+        type=type_name,
+        status=status,
+        project_id=project_id,
+    )
+
+
+async def _next_event(gen, timeout: float = 1.0) -> JobEvent:
+    return await asyncio.wait_for(gen.__anext__(), timeout=timeout)
+
+
+@pytest.mark.asyncio
+async def test_snapshot_then_job_event():
+    existing = _record("j_existing0001")
+    bus = JobEventBus(snapshot_provider=lambda: [existing])
+
+    gen = bus.subscribe()
+    snapshot = await _next_event(gen)
+    assert snapshot.event == "snapshot"
+    assert [j["id"] for j in snapshot.data["jobs"]] == ["j_existing0001"]
+
+    new = _record("j_new000000001")
+    bus.publish_job(new)
+    job_event = await _next_event(gen)
+    assert job_event.event == "job"
+    assert job_event.data["id"] == "j_new000000001"
+
+    await gen.aclose()
+
+
+@pytest.mark.asyncio
+async def test_deleted_event():
+    bus = JobEventBus(snapshot_provider=lambda: [])
+    gen = bus.subscribe()
+    await _next_event(gen)  # snapshot
+
+    bus.publish_deleted("j_gone00000001")
+    event = await _next_event(gen)
+    assert event.event == "deleted"
+    assert event.data == {"id": "j_gone00000001"}
+
+    await gen.aclose()
+
+
+@pytest.mark.asyncio
+async def test_filter_by_project_id():
+    matching = _record("j_match0000001", project_id="p_keep")
+    other = _record("j_other0000001", project_id="p_drop")
+    bus = JobEventBus(snapshot_provider=lambda: [matching, other])
+
+    gen = bus.subscribe(project_id="p_keep")
+    snapshot = await _next_event(gen)
+    assert [j["id"] for j in snapshot.data["jobs"]] == ["j_match0000001"]
+
+    bus.publish_job(other)
+    bus.publish_job(matching)
+    event = await _next_event(gen)
+    assert event.data["id"] == "j_match0000001"
+
+    await gen.aclose()
+
+
+@pytest.mark.asyncio
+async def test_filter_by_type_and_job_id():
+    bus = JobEventBus(snapshot_provider=lambda: [])
+    gen = bus.subscribe(type_name="eval", job_id="j_target000001")
+    await _next_event(gen)  # snapshot
+
+    bus.publish_job(_record("j_other0000001", type_name="noop"))
+    bus.publish_job(_record("j_target000001", type_name="eval"))
+    event = await _next_event(gen)
+    assert event.data["id"] == "j_target000001"
+
+    await gen.aclose()
+
+
+@pytest.mark.asyncio
+async def test_keepalive_ping_does_not_finalize_generator():
+    # Regression: the timeout must yield `ping` events from inside the generator
+    # and keep it alive, not finalize it after the first one.
+    bus = JobEventBus(snapshot_provider=lambda: [])
+    gen = bus.subscribe(timeout=0.02)
+    assert (await _next_event(gen)).event == "snapshot"
+    assert (await _next_event(gen)).event == "ping"
+    assert (await _next_event(gen)).event == "ping"
+
+    # A real event still flows after pings.
+    bus.publish_job(_record("j_after000001"))
+    event = await _next_event(gen)
+    assert event.event == "job"
+    assert event.data["id"] == "j_after000001"
+
+    await gen.aclose()
+
+
+@pytest.mark.asyncio
+async def test_shutdown_ends_open_stream_and_rejects_new_ones():
+    bus = JobEventBus(snapshot_provider=lambda: [])
+    gen = bus.subscribe()
+    assert (await _next_event(gen)).event == "snapshot"
+
+    # shutdown() pushes a close sentinel so the open generator returns.
+    bus.shutdown()
+    with pytest.raises(StopAsyncIteration):
+        await _next_event(gen)
+
+    # A subscription opened after shutdown ends immediately (no snapshot).
+    gen2 = bus.subscribe()
+    with pytest.raises(StopAsyncIteration):
+        await _next_event(gen2)
+
+
+@pytest.mark.asyncio
+async def test_shutdown_unblocks_subscriber_waiting_without_timeout():
+    # With no keepalive timeout the subscriber blocks on queue.get(); shutdown()
+    # must still wake it so a hot reload isn't held open.
+    bus = JobEventBus(snapshot_provider=lambda: [])
+    gen = bus.subscribe()  # timeout=None
+    assert (await _next_event(gen)).event == "snapshot"
+
+    async def _drain():
+        with pytest.raises(StopAsyncIteration):
+            await gen.__anext__()
+
+    waiter = asyncio.ensure_future(_drain())
+    await asyncio.sleep(0)  # let the waiter block on queue.get()
+    bus.shutdown()
+    await asyncio.wait_for(waiter, timeout=1.0)
diff --git a/app/desktop/studio_server/jobs/test_registry.py b/app/desktop/studio_server/jobs/test_registry.py
new file mode 100644
index 000000000..71a46a5bd
--- /dev/null
+++ b/app/desktop/studio_server/jobs/test_registry.py
@@ -0,0 +1,874 @@
+from __future__ import annotations
+
+import asyncio
+import uuid
+
+import pytest
+from pydantic import BaseModel
+
+from app.desktop.studio_server.jobs import error_log
+from app.desktop.studio_server.jobs.models import (
+    JobDerivedState,
+    BackgroundJobStatus,
+    JobWorker,
+)
+from app.desktop.studio_server.jobs.registry import (
+    JobNotFoundError,
+    JobOperationError,
+    JobRegistry,
+    _new_job_id,
+)
+from app.desktop.studio_server.jobs.workers.noop import NoopJobWorker
+
+
+@pytest.fixture(autouse=True)
+def temp_error_log_dir(tmp_path, monkeypatch):
+    monkeypatch.setattr(
+        "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir",
+        lambda: str(tmp_path),
+    )
+
+
+@pytest.fixture
+def registry():
+    reg = JobRegistry(max_concurrent=10)
+    reg.register_type(NoopJobWorker)
+    return reg
+
+
+async def wait_for_status(
+    registry: JobRegistry,
+    job_id: str,
+    target: BackgroundJobStatus | set[BackgroundJobStatus],
+    timeout: float = 3.0,
+) -> None:
+    targets = {target} if isinstance(target, BackgroundJobStatus) else target
+    deadline = asyncio.get_event_loop().time() + timeout
+    while asyncio.get_event_loop().time() < deadline:
+        job = registry._jobs.get(job_id)
+        if job is not None and job.status in targets:
+            return
+        await asyncio.sleep(0.01)
+    job = registry._jobs.get(job_id)
+    actual = job.status if job else "missing"
+    raise AssertionError(f"Job {job_id} did not reach {targets}; was {actual}")
+
+
+# -- supporting test workers ------------------------------------------------
+
+
+class _EmptyParams(BaseModel):
+    pass
+
+
+class _EmptyResult(BaseModel):
+    pass
+
+
+class NonPausableWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    type_name = "nonpausable"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = False
+
+    async def run(self, params, ctx):
+        await asyncio.sleep(5)
+        return _EmptyResult()
+
+
+class AlreadyCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    type_name = "already_complete"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    run_called = False
+
+    async def compute_state(self, params):
+        return JobDerivedState(total=5, success=5, error=0, is_complete=True)
+
+    async def run(self, params, ctx):
+        type(self).run_called = True
+        return _EmptyResult()
+
+
+class PartialProgressWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """First reports the full set (total + message), then a count-only update.
+    The later partial update must preserve the earlier total/message, not null
+    them.
+    """
+
+    type_name = "partial_progress"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = False
+
+    async def run(self, params, ctx):
+        await ctx.report_progress(success=1, total=50, message="starting")
+        await ctx.report_progress(success=5)
+        return _EmptyResult()
+
+
+class RaceCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """run() blocks on a test-controlled gate, then returns normally without
+    ever observing a cancellation. The test opens the gate (so run() returns and
+    the supervising task drives the job to its terminal succeeded state) and only
+    then issues pause/cancel — reproducing the completion-vs-cancel race where
+    the job finished naturally during the cancel await.
+    """
+
+    type_name = "race_complete"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    gate: asyncio.Event
+
+    async def run(self, params, ctx):
+        await type(self).gate.wait()
+        return _EmptyResult()
+
+
+class SwallowCancelWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """Catches CancelledError, fully clears the cancellation (uncancel) so it is
+    not re-raised, and returns normally — the worst-case "swallows CancelledError
+    and returns silently" worker. The cancellation transition is unconditional,
+    so the registry itself must land the job in paused/cancelled rather than
+    trusting the worker to re-raise.
+
+    `started` is set once run() is actually suspended at its await point, so a
+    test can guarantee the cancellation is delivered into the worker body (not
+    before it runs) before issuing pause/cancel.
+    """
+
+    type_name = "swallow_cancel"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    started: asyncio.Event
+    gate: asyncio.Event
+
+    async def run(self, params, ctx):
+        type(self).started.set()
+        try:
+            await type(self).gate.wait()
+        except asyncio.CancelledError:
+            task = asyncio.current_task()
+            # task.uncancel() was added in Python 3.11; on 3.10 simply
+            # swallowing the CancelledError exercises the same worst-case
+            # "swallows cancel and returns normally" path.
+            if task is not None and hasattr(task, "uncancel"):
+                task.uncancel()
+        return _EmptyResult()
+
+
+class TotalThenNoneWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """run() reports a known total via report_progress, then compute_state at
+    pause returns total=None alongside success/error counts. The reconcile must
+    preserve the prior total rather than wiping the denominator to None.
+    """
+
+    type_name = "total_then_none"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    started: asyncio.Event
+    gate: asyncio.Event
+
+    async def compute_state(self, params):
+        return JobDerivedState(total=None, success=2, error=1, is_complete=False)
+
+    async def run(self, params, ctx):
+        await ctx.report_progress(success=0, total=10, message="starting")
+        type(self).started.set()
+        try:
+            await type(self).gate.wait()
+        except asyncio.CancelledError:
+            task = asyncio.current_task()
+            # task.uncancel() was added in Python 3.11; on 3.10 simply
+            # swallowing the CancelledError exercises the same worst-case
+            # "swallows cancel and returns normally" path.
+            if task is not None and hasattr(task, "uncancel"):
+                task.uncancel()
+        return _EmptyResult()
+
+
+class ReconcileCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    """compute_state reports complete only once the test flips `done`, so a
+    get() issued while the job is still running (run() is a long sleep)
+    reconciles it straight to succeeded mid-flight.
+    """
+
+    type_name = "reconcile_complete"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    supports_pause = True
+    done = False
+
+    async def compute_state(self, params):
+        complete = type(self).done
+        return JobDerivedState(
+            total=3, success=3 if complete else 1, error=0, is_complete=complete
+        )
+
+    async def run(self, params, ctx):
+        await asyncio.sleep(5)
+        return _EmptyResult()
+
+
+# -- job id ------------------------------------------------------------------
+
+
+def test_job_id_format():
+    job_id = _new_job_id()
+    assert job_id.startswith("j_")
+    suffix = job_id[2:]
+    assert len(suffix) == 12
+    assert all(c in "abcdefghijklmnopqrstuvwxyz234567" for c in suffix)
+
+
+# -- lifecycle ---------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_full_lifecycle_succeeds(registry):
+    job = await registry.create("noop", {"steps": 3, "sleep_per_step_seconds": 0.01})
+    assert job.status in (BackgroundJobStatus.PENDING, BackgroundJobStatus.RUNNING)
+    assert job.supports_pause is True
+
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    final = registry._jobs[job.id]
+    assert final.result == {"completed_steps": 3}
+    assert final.started_at is not None
+    assert final.ended_at is not None
+    assert final.run_id is not None
+    assert final.progress.success == 3
+
+
+@pytest.mark.asyncio
+async def test_failure_path_captures_error_log(registry):
+    job = await registry.create(
+        "noop",
+        {"steps": 5, "sleep_per_step_seconds": 0.01, "fail_at_step": 2},
+    )
+    await wait_for_status(registry, job.id, BackgroundJobStatus.FAILED)
+
+    final = registry._jobs[job.id]
+    assert final.error is not None
+    assert final.error.error is not None
+    assert "intentional fail at step 2" in final.error.error
+
+    entries = error_log.read_errors(final.run_id)
+    fatal = [e for e in entries if e.get("fatal")]
+    assert len(fatal) == 1
+    assert "intentional fail at step 2" in fatal[0]["error_message"]
+
+
+@pytest.mark.asyncio
+async def test_non_fatal_errors_logged_and_counted(registry):
+    job = await registry.create(
+        "noop",
+        {
+            "steps": 4,
+            "sleep_per_step_seconds": 0.01,
+            "error_at_steps": [1, 3],
+        },
+    )
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+
+    final = registry._jobs[job.id]
+    assert final.progress.error == 2
+    assert final.progress.success == 2
+
+    entries = error_log.read_errors(final.run_id)
+    messages = [e["error_message"] for e in entries]
+    assert "intentional error at step 1" in messages
+    assert "intentional error at step 3" in messages
+    steps = sorted(e["step"] for e in entries if "step" in e)
+    assert steps == [1, 3]
+
+
+@pytest.mark.asyncio
+async def test_error_log_missing_returns_empty():
+    assert error_log.read_errors(str(uuid.uuid4())) == []
+
+
+# -- cancel ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_cancel_pending_job_never_starts():
+    reg = JobRegistry(max_concurrent=1)
+    reg.register_type(NoopJobWorker)
+    running = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(reg, running.id, BackgroundJobStatus.RUNNING)
+    pending = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    assert reg._jobs[pending.id].status == BackgroundJobStatus.PENDING
+
+    await reg.cancel(pending.id)
+    assert reg._jobs[pending.id].status == BackgroundJobStatus.CANCELLED
+    assert pending.id not in reg._tasks
+
+    await reg.cancel(running.id)
+
+
+@pytest.mark.asyncio
+async def test_cancel_from_running(registry):
+    job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    await registry.cancel(job.id)
+    assert registry._jobs[job.id].status == BackgroundJobStatus.CANCELLED
+
+
+@pytest.mark.asyncio
+async def test_cancel_immediately_after_create_reclaims_slot():
+    # Cancelling right after create can race the supervising task before its
+    # coroutine body runs; the registry must still reclaim the concurrency slot.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(NoopJobWorker)
+    ids = []
+    for _ in range(6):
+        job = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.02})
+        ids.append(job.id)
+    for job_id in ids:
+        await reg.cancel(job_id)
+    await asyncio.sleep(0.05)
+
+    assert all(reg._jobs[i].status == BackgroundJobStatus.CANCELLED for i in ids)
+    assert reg._running_count == 0
+    assert reg._tasks == {}
+    assert reg._pending_ids == []
+
+
+@pytest.mark.asyncio
+async def test_cancel_terminal_raises(registry):
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    with pytest.raises(JobOperationError):
+        await registry.cancel(job.id)
+
+
+# -- pause / resume ----------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_pause_then_resume_succeeds(registry):
+    job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.03})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    first_run_id = registry._jobs[job.id].run_id
+
+    await registry.pause(job.id)
+    assert registry._jobs[job.id].status == BackgroundJobStatus.PAUSED
+
+    # Make resume finish quickly by checking it re-runs with a fresh run_id.
+    await registry.resume(job.id)
+    assert registry._jobs[job.id].status in (
+        BackgroundJobStatus.PENDING,
+        BackgroundJobStatus.RUNNING,
+    )
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    second_run_id = registry._jobs[job.id].run_id
+    assert second_run_id is not None
+    assert second_run_id != first_run_id
+
+    await registry.cancel(job.id)
+
+
+@pytest.mark.asyncio
+async def test_resume_to_succeeded_when_complete():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(NoopJobWorker)
+    reg.register_type(AlreadyCompleteWorker)
+    AlreadyCompleteWorker.run_called = False
+
+    # Start a noop that we pause so we have a paused job to resume against a
+    # complete worker. Simpler: create the complete worker job, it succeeds
+    # immediately via reconcile at launch.
+    job = await reg.create("already_complete", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.SUCCEEDED)
+    assert AlreadyCompleteWorker.run_called is False
+    assert reg._jobs[job.id].progress.success == 5
+
+
+@pytest.mark.asyncio
+async def test_pause_rejected_when_not_supported():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(NonPausableWorker)
+    job = await reg.create("nonpausable", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    with pytest.raises(JobOperationError):
+        await reg.pause(job.id)
+    await reg.cancel(job.id)
+
+
+@pytest.mark.asyncio
+async def test_pause_rejected_when_not_running(registry):
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    with pytest.raises(JobOperationError):
+        await registry.pause(job.id)
+
+
+@pytest.mark.asyncio
+async def test_resume_rejected_when_not_paused(registry):
+    job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    with pytest.raises(JobOperationError):
+        await registry.resume(job.id)
+    await registry.cancel(job.id)
+
+
+async def _drive_completion_race(operation: str) -> JobRegistry:
+    # Reproduce the completion-vs-cancel race deterministically: the worker's
+    # run() is gated; we open the gate at the exact moment the lifecycle op
+    # begins its cancel await, so the supervising task finishes naturally
+    # (job -> succeeded, task done) before/while task.cancel() lands. The job
+    # was running at the op's entry check, so it gets past the guard, but the
+    # terminal succeeded state must survive.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(RaceCompleteWorker)
+    RaceCompleteWorker.gate = asyncio.Event()
+    job = await reg.create("race_complete", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+
+    original_cancel_task = reg._cancel_task
+
+    async def open_gate_then_cancel(job_id: str) -> None:
+        # Let run() return and the supervising task drive to terminal first.
+        RaceCompleteWorker.gate.set()
+        task = reg._tasks.get(job_id)
+        if task is not None:
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+        await original_cancel_task(job_id)
+
+    reg._cancel_task = open_gate_then_cancel  # type: ignore[method-assign]
+
+    if operation == "pause":
+        await reg.pause(job.id)
+    else:
+        await reg.cancel(job.id)
+    return reg
+
+
+@pytest.mark.asyncio
+async def test_pause_loses_race_to_natural_completion_keeps_succeeded():
+    # Regression: if run() completes naturally during pause()'s cancel-await,
+    # the job is already terminal (succeeded) and pause() must not clobber it
+    # back to paused (which would drop the result and allow a resume re-run).
+    reg = await _drive_completion_race("pause")
+    job_id = next(iter(reg._jobs))
+    assert reg._jobs[job_id].status == BackgroundJobStatus.SUCCEEDED
+    assert reg._jobs[job_id].result is not None
+
+
+@pytest.mark.asyncio
+async def test_cancel_loses_race_to_natural_completion_keeps_succeeded():
+    # The cancel() path already guards on is_terminal; lock it in.
+    reg = await _drive_completion_race("cancel")
+    job_id = next(iter(reg._jobs))
+    assert reg._jobs[job_id].status == BackgroundJobStatus.SUCCEEDED
+    assert reg._jobs[job_id].result is not None
+
+
+@pytest.mark.asyncio
+async def test_pause_enforced_when_worker_swallows_cancel():
+    # A worker that catches CancelledError (and uncancels it) then returns
+    # normally must still be paused, not succeeded — the cancellation transition
+    # is unconditional and enforced by the registry, not the worker.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(SwallowCancelWorker)
+    SwallowCancelWorker.started = asyncio.Event()
+    SwallowCancelWorker.gate = asyncio.Event()
+    job = await reg.create("swallow_cancel", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    await asyncio.wait_for(SwallowCancelWorker.started.wait(), timeout=3.0)
+
+    result = await reg.pause(job.id)
+    assert result.status == BackgroundJobStatus.PAUSED
+    assert reg._jobs[job.id].result is None
+
+
+@pytest.mark.asyncio
+async def test_cancel_enforced_when_worker_swallows_cancel():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(SwallowCancelWorker)
+    SwallowCancelWorker.started = asyncio.Event()
+    SwallowCancelWorker.gate = asyncio.Event()
+    job = await reg.create("swallow_cancel", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    await asyncio.wait_for(SwallowCancelWorker.started.wait(), timeout=3.0)
+
+    result = await reg.cancel(job.id)
+    assert result.status == BackgroundJobStatus.CANCELLED
+    assert reg._jobs[job.id].result is None
+
+
+@pytest.mark.asyncio
+async def test_cancel_from_paused():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(NoopJobWorker)
+    job = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.03})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    await reg.pause(job.id)
+    assert reg._jobs[job.id].status == BackgroundJobStatus.PAUSED
+
+    result = await reg.cancel(job.id)
+    assert result.status == BackgroundJobStatus.CANCELLED
+    assert reg._jobs[job.id].status == BackgroundJobStatus.CANCELLED
+    assert reg._jobs[job.id].ended_at is not None
+
+
+# -- delete ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_delete_terminal_emits_deleted(registry):
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+
+    events = []
+    gen = registry.events.subscribe()
+    await asyncio.wait_for(gen.__anext__(), timeout=1.0)  # snapshot
+
+    async def collect():
+        async for event in gen:
+            events.append(event)
+
+    collector = asyncio.create_task(collect())
+    await registry.delete(job.id)
+    await asyncio.sleep(0.05)
+    collector.cancel()
+    try:
+        await collector
+    except asyncio.CancelledError:
+        pass
+
+    assert job.id not in registry._jobs
+    assert any(e.event == "deleted" and e.data["id"] == job.id for e in events)
+
+
+@pytest.mark.asyncio
+async def test_delete_running_raises(registry):
+    job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    with pytest.raises(JobOperationError):
+        await registry.delete(job.id)
+    await registry.cancel(job.id)
+
+
+@pytest.mark.asyncio
+async def test_delete_pending_raises():
+    reg = JobRegistry(max_concurrent=1)
+    reg.register_type(NoopJobWorker)
+    running = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(reg, running.id, BackgroundJobStatus.RUNNING)
+    pending = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    assert reg._jobs[pending.id].status == BackgroundJobStatus.PENDING
+    with pytest.raises(JobOperationError):
+        await reg.delete(pending.id)
+    await reg.cancel(running.id)
+    await reg.cancel(pending.id)
+
+
+# -- reconciliation ----------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_compute_state_none_keeps_snapshot(registry):
+    # Noop's compute_state returns None, so the believed snapshot from
+    # report_progress is preserved and never flipped to complete early.
+    job = await registry.create("noop", {"steps": 4, "sleep_per_step_seconds": 0.02})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    # get() triggers reconcile; with None it must not change progress/status.
+    got = await registry.get(job.id)
+    assert got is not None
+    assert got.status in (BackgroundJobStatus.RUNNING, BackgroundJobStatus.SUCCEEDED)
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    assert registry._jobs[job.id].progress.success == 4
+
+
+@pytest.mark.asyncio
+async def test_report_progress_preserves_total_and_message_when_omitted():
+    # A count-only report_progress call must not wipe a total/message set by an
+    # earlier call.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(PartialProgressWorker)
+    job = await reg.create("partial_progress", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.SUCCEEDED)
+
+    final = reg._jobs[job.id]
+    assert final.progress.success == 5
+    assert final.progress.total == 50
+    assert final.progress.message == "starting"
+
+
+@pytest.mark.asyncio
+async def test_apply_derived_preserves_total_when_compute_state_returns_none():
+    # A compute_state that returns total=None (unknown denominator) alongside
+    # success/error counts must not wipe a total set earlier via report_progress.
+    # total=None means "unknown, keep what we had", mirroring message handling.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(TotalThenNoneWorker)
+    TotalThenNoneWorker.started = asyncio.Event()
+    TotalThenNoneWorker.gate = asyncio.Event()
+    job = await reg.create("total_then_none", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    await asyncio.wait_for(TotalThenNoneWorker.started.wait(), timeout=3.0)
+    assert reg._jobs[job.id].progress.total == 10
+
+    # pause() runs compute_state (total=None, success=2, error=1) through
+    # _apply_derived; the prior total of 10 must survive.
+    result = await reg.pause(job.id)
+    assert result.status == BackgroundJobStatus.PAUSED
+    assert result.progress.total == 10
+    assert result.progress.success == 2
+    assert result.progress.error == 1
+
+
+@pytest.mark.asyncio
+async def test_get_reconciles_running_job_to_succeeded_mid_flight():
+    # A long-running job whose source-of-truth state flips to complete should be
+    # reconciled straight to succeeded by get() (the running/get() reconcile
+    # path), not only at launch time.
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(ReconcileCompleteWorker)
+    ReconcileCompleteWorker.done = False
+    job = await reg.create("reconcile_complete", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    # Still running here (run() is a 5s sleep); now flip the source of truth.
+    assert reg._jobs[job.id].status == BackgroundJobStatus.RUNNING
+    ReconcileCompleteWorker.done = True
+
+    got = await reg.get(job.id)
+    assert got is not None
+    assert got.status == BackgroundJobStatus.SUCCEEDED
+    assert got.progress.success == 3
+    assert got.ended_at is not None
+
+
+# -- concurrency -------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_semaphore_caps_concurrency_fifo():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(NoopJobWorker)
+
+    jobs = []
+    for _ in range(4):
+        jobs.append(
+            await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+        )
+
+    await asyncio.sleep(0.05)
+    statuses = [reg._jobs[j.id].status for j in jobs]
+    running = [s for s in statuses if s == BackgroundJobStatus.RUNNING]
+    pending = [s for s in statuses if s == BackgroundJobStatus.PENDING]
+    assert len(running) == 2
+    assert len(pending) == 2
+    # FIFO: the first two created are the running ones.
+    assert statuses[0] == BackgroundJobStatus.RUNNING
+    assert statuses[1] == BackgroundJobStatus.RUNNING
+    assert statuses[2] == BackgroundJobStatus.PENDING
+    assert statuses[3] == BackgroundJobStatus.PENDING
+
+    # Cancel the running ones; pending should be promoted.
+    await reg.cancel(jobs[0].id)
+    await reg.cancel(jobs[1].id)
+    await wait_for_status(reg, jobs[2].id, BackgroundJobStatus.RUNNING)
+    await wait_for_status(reg, jobs[3].id, BackgroundJobStatus.RUNNING)
+
+    await reg.cancel(jobs[2].id)
+    await reg.cancel(jobs[3].id)
+
+
+# -- events ------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_registry_emits_snapshot_and_job_events(registry):
+    gen = registry.events.subscribe()
+    snapshot = await asyncio.wait_for(gen.__anext__(), timeout=1.0)
+    assert snapshot.event == "snapshot"
+    assert snapshot.data["jobs"] == []
+
+    events = []
+
+    async def collect():
+        async for event in gen:
+            events.append(event)
+
+    collector = asyncio.create_task(collect())
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    await asyncio.sleep(0.02)
+    collector.cancel()
+    try:
+        await collector
+    except asyncio.CancelledError:
+        pass
+
+    job_events = [e for e in events if e.event == "job"]
+    assert len(job_events) >= 2
+    assert any(e.data["status"] == "running" for e in job_events)
+    assert any(e.data["status"] == "succeeded" for e in job_events)
+
+
+# -- wait --------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_wait_returns_immediately_for_terminal_job(registry):
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED)
+    awaited = await asyncio.wait_for(registry.wait(job.id), timeout=1.0)
+    assert awaited.id == job.id
+    assert awaited.status == BackgroundJobStatus.SUCCEEDED
+    assert awaited.result == {"completed_steps": 2}
+
+
+@pytest.mark.asyncio
+async def test_wait_blocks_then_returns_terminal_record(registry):
+    job = await registry.create("noop", {"steps": 4, "sleep_per_step_seconds": 0.03})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    awaited = await asyncio.wait_for(registry.wait(job.id), timeout=3.0)
+    assert awaited.status == BackgroundJobStatus.SUCCEEDED
+    assert awaited.result == {"completed_steps": 4}
+
+
+@pytest.mark.asyncio
+async def test_wait_unknown_raises(registry):
+    with pytest.raises(JobNotFoundError):
+        await registry.wait("j_doesnotexist")
+
+
+@pytest.mark.asyncio
+async def test_wait_times_out(registry):
+    job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    with pytest.raises(asyncio.TimeoutError):
+        await registry.wait(job.id, timeout=0.01)
+    await registry.cancel(job.id)
+
+
+@pytest.mark.asyncio
+async def test_wait_cancellation_leaves_job_running(registry):
+    # The load-bearing decoupling invariant: abandoning a wait() must NOT stop
+    # the job. A second concurrent waiter still resolves to the terminal record.
+    job = await registry.create("noop", {"steps": 6, "sleep_per_step_seconds": 0.05})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+
+    abandoned = asyncio.create_task(registry.wait(job.id))
+    survivor = asyncio.create_task(registry.wait(job.id))
+    # Let both awaiters reach their await point, then abandon the first.
+    await asyncio.sleep(0.02)
+    abandoned.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        await abandoned
+
+    # The job keeps running and the surviving waiter resolves to its terminal
+    # record — the supervising task was untouched by the cancelled awaiter.
+    result = await asyncio.wait_for(survivor, timeout=3.0)
+    assert result.status == BackgroundJobStatus.SUCCEEDED
+    assert result.result == {"completed_steps": 6}
+
+
+@pytest.mark.asyncio
+async def test_wait_multiple_waiters_both_resolve(registry):
+    job = await registry.create("noop", {"steps": 4, "sleep_per_step_seconds": 0.03})
+    await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING)
+    first = asyncio.create_task(registry.wait(job.id))
+    second = asyncio.create_task(registry.wait(job.id))
+    one, two = await asyncio.wait_for(asyncio.gather(first, second), timeout=3.0)
+    assert one.status == BackgroundJobStatus.SUCCEEDED
+    assert two.status == BackgroundJobStatus.SUCCEEDED
+    assert one.result == two.result == {"completed_steps": 4}
+
+
+@pytest.mark.asyncio
+async def test_delete_removes_completion_event(registry):
+    job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01})
+    # wait() lazily creates the completion event; it survives to the terminal set.
+    awaited = await asyncio.wait_for(registry.wait(job.id), timeout=3.0)
+    assert awaited.status == BackgroundJobStatus.SUCCEEDED
+    assert job.id in registry._completion_events
+
+    await registry.delete(job.id)
+    assert job.id not in registry._completion_events
+
+
+# -- not found ---------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_get_unknown_returns_none(registry):
+    assert await registry.get("j_doesnotexist") is None
+
+
+@pytest.mark.asyncio
+async def test_lifecycle_op_unknown_raises(registry):
+    with pytest.raises(JobNotFoundError):
+        await registry.cancel("j_doesnotexist")
+
+
+# -- typed progress detail ---------------------------------------------------
+
+
+class DetailModel(BaseModel):
+    phase: str
+    done: int
+
+
+class DetailWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    type_name = "detail"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    progress_model = DetailModel
+    gate: asyncio.Event
+
+    async def run(self, params, ctx):
+        await ctx.report_progress_detail(DetailModel(phase="extract", done=3))
+        await type(self).gate.wait()
+        return _EmptyResult()
+
+
+@pytest.mark.asyncio
+async def test_report_progress_detail_stamps_typed_payload():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(DetailWorker)
+    DetailWorker.gate = asyncio.Event()
+    job = await reg.create("detail", {})
+    await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING)
+    # Give the worker a tick to report the detail.
+    for _ in range(50):
+        if reg._jobs[job.id].progress_detail is not None:
+            break
+        await asyncio.sleep(0.01)
+    assert reg._jobs[job.id].progress_detail == {"phase": "extract", "done": 3}
+    DetailWorker.gate.set()
+
+
+class WrongModel(BaseModel):
+    other: str
+
+
+class BadDetailWorker(JobWorker[_EmptyParams, _EmptyResult]):
+    type_name = "bad_detail"
+    params_model = _EmptyParams
+    result_model = _EmptyResult
+    progress_model = DetailModel
+
+    async def run(self, params, ctx):
+        await ctx.report_progress_detail(WrongModel(other="x"))
+        return _EmptyResult()
+
+
+@pytest.mark.asyncio
+async def test_report_progress_detail_rejects_wrong_model():
+    reg = JobRegistry(max_concurrent=2)
+    reg.register_type(BadDetailWorker)
+    job = await reg.create("bad_detail", {})
+    # The type guard raises inside run(), routing the job to FAILED.
+    await wait_for_status(reg, job.id, BackgroundJobStatus.FAILED)
+    assert reg._jobs[job.id].error is not None
diff --git a/app/desktop/studio_server/jobs/workers/__init__.py b/app/desktop/studio_server/jobs/workers/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/app/desktop/studio_server/jobs/workers/noop.py b/app/desktop/studio_server/jobs/workers/noop.py
new file mode 100644
index 000000000..23cc8d04a
--- /dev/null
+++ b/app/desktop/studio_server/jobs/workers/noop.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import asyncio
+
+from pydantic import BaseModel
+
+from ..models import JobContext, JobDerivedState, JobWorker
+
+
+class NoopJobParams(BaseModel):
+    steps: int = 10
+    sleep_per_step_seconds: float = 0.5
+    fail_at_step: int | None = None
+    error_at_steps: list[int] = []
+
+
+class NoopJobResult(BaseModel):
+    completed_steps: int
+
+
+class NoopJobWorker(JobWorker[NoopJobParams, NoopJobResult]):
+    type_name = "noop"
+    params_model = NoopJobParams
+    result_model = NoopJobResult
+    supports_pause = True
+
+    async def compute_state(self, params: NoopJobParams) -> JobDerivedState | None:
+        return None
+
+    async def run(self, params: NoopJobParams, ctx: JobContext) -> NoopJobResult:
+        success = error = 0
+        for i in range(params.steps):
+            await asyncio.sleep(params.sleep_per_step_seconds)
+            if params.fail_at_step == i:
+                raise RuntimeError(f"intentional fail at step {i}")
+            if i in params.error_at_steps:
+                error += 1
+                await ctx.report_error(f"intentional error at step {i}", step=i)
+            else:
+                success += 1
+            await ctx.report_progress(
+                success=success,
+                error=error,
+                total=params.steps,
+                message=f"step {i + 1}/{params.steps}",
+            )
+        return NoopJobResult(completed_steps=success + error)
diff --git a/app/web_ui/.env.example b/app/web_ui/.env.example
index c966be48d..5bd044fe1 100644
--- a/app/web_ui/.env.example
+++ b/app/web_ui/.env.example
@@ -12,5 +12,9 @@
 # Useful for debugging agent behavior during development.
 # PUBLIC_SHOW_TOOL_CALL_DETAILS=true
 
+# Background Jobs UI — set to "true" to show the Jobs entry in the sidebar and
+# enable the jobs dialog. When unset or any other value, the feature is hidden.
+# PUBLIC_ENABLE_JOBS=true
+
 # Sentry — set the DSN to enable client-side error reporting. Unset = no-op.
 # VITE_KILN_SENTRY_DSN=https://...@o.../...
diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts
index 2ae200867..d8b538d6e 100644
--- a/app/web_ui/src/lib/api_schema.d.ts
+++ b/app/web_ui/src/lib/api_schema.d.ts
@@ -3122,6 +3122,188 @@ export interface paths {
         patch?: never;
         trace?: never;
     };
+    "/api/jobs/events": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /**
+         * Stream Job Events
+         * @description Server-sent events for jobs. Emits an initial `snapshot`, then per-job
+         *     `job` and `deleted` events. A pure observer: disconnecting never stops a job.
+         */
+        get: operations["stream_job_events_api_jobs_events_get"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /** List Jobs */
+        get: operations["list_jobs_api_jobs_get"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{type}": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /** Create Job */
+        post: operations["create_job_api_jobs__type__post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /** Get Job */
+        get: operations["get_job_api_jobs__id__get"];
+        put?: never;
+        post?: never;
+        /** Delete Job */
+        delete: operations["delete_job_api_jobs__id__delete"];
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/result": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /** Get Job Result */
+        get: operations["get_job_result_api_jobs__id__result_get"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/wait": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /**
+         * Wait For Job
+         * @description Block until the job reaches a terminal state, then return its record.
+         *
+         *     A pure observer, like the SSE stream: if the client disconnects, uvicorn
+         *     cancels this handler coroutine, which cancels the wait() await and tears
+         *     down only the awaiter — the job's supervising task keeps running.
+         */
+        get: operations["wait_for_job_api_jobs__id__wait_get"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/errors": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        /** Get Job Errors */
+        get: operations["get_job_errors_api_jobs__id__errors_get"];
+        put?: never;
+        post?: never;
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/pause": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /** Pause Job */
+        post: operations["pause_job_api_jobs__id__pause_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/resume": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /** Resume Job */
+        post: operations["resume_job_api_jobs__id__resume_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
+    "/api/jobs/{id}/cancel": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /** Cancel Job */
+        post: operations["cancel_job_api_jobs__id__cancel_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
 }
 export type webhooks = Record<string, never>;
 export interface components {
@@ -3589,6 +3771,11 @@ export interface components {
              */
             provider_type: "builtin" | "custom";
         };
+        /**
+         * BackgroundJobStatus
+         * @enum {string}
+         */
+        BackgroundJobStatus: "pending" | "running" | "paused" | "succeeded" | "failed" | "cancelled";
         /**
          * BasePrompt
          * @description A prompt for a task. This is the basic data storage format which can be used throughout a project.
@@ -4381,6 +4568,44 @@ export interface components {
             data_strategy: components["schemas"]["ChatStrategy"];
             run_config_properties?: components["schemas"]["KilnAgentRunConfigProperties"] | null;
         };
+        /**
+         * CreateJobRequest
+         * @description Request body for creating a job. Params are validated per job type.
+         */
+        CreateJobRequest: {
+            /**
+             * Params
+             * @description Type-specific job parameters, validated against the type's params model.
+             */
+            params?: {
+                [key: string]: unknown;
+            };
+            /**
+             * Project Id
+             * @description Project to scope this job to (for filtering/visibility). Falls back to the params' project_id when omitted.
+             */
+            project_id?: string | null;
+            /**
+             * Metadata
+             * @description Free-form pass-through attribution, stored verbatim.
+             */
+            metadata?: {
+                [key: string]: unknown;
+            } | null;
+        };
+        /**
+         * CreateJobResponse
+         * @description Response returned when a job is created.
+         */
+        CreateJobResponse: {
+            /**
+             * Job Id
+             * @description The id of the newly created job.
+             */
+            job_id: string;
+            /** @description The job's status immediately after creation. */
+            status: components["schemas"]["BackgroundJobStatus"];
+        };
         /** CreateKilnCopilotApiKeyRequest */
         CreateKilnCopilotApiKeyRequest: {
             /**
@@ -6792,6 +7017,98 @@ export interface components {
              */
             template: string;
         };
+        /**
+         * JobError
+         * @description Small failure summary stamped on the record. Detail lives in the error log.
+         */
+        JobError: {
+            /** Error */
+            error?: string | null;
+            /** Detail */
+            detail?: {
+                [key: string]: unknown;
+            } | null;
+        };
+        /**
+         * JobProgress
+         * @description Count-based progress for a job.
+         *
+         *     Processed = success + error; remaining = total - success - error. The error
+         *     field is a count only — the actual messages live in the per-run error log.
+         */
+        JobProgress: {
+            /** Total */
+            total?: number | null;
+            /**
+             * Success
+             * @default 0
+             */
+            success: number;
+            /**
+             * Error
+             * @default 0
+             */
+            error: number;
+            /** Message */
+            message?: string | null;
+            /**
+             * Updated At
+             * Format: date-time
+             */
+            updated_at?: string;
+        };
+        /**
+         * JobRecord
+         * @description Ephemeral, in-memory bookkeeping for a single job. Never persisted to disk.
+         */
+        JobRecord: {
+            /** Id */
+            id: string;
+            /** Type */
+            type: string;
+            status: components["schemas"]["BackgroundJobStatus"];
+            /** Run Id */
+            run_id?: string | null;
+            progress?: components["schemas"]["JobProgress"];
+            /** Progress Detail */
+            progress_detail?: {
+                [key: string]: unknown;
+            } | null;
+            /** Params */
+            params?: {
+                [key: string]: unknown;
+            };
+            /** Result */
+            result?: {
+                [key: string]: unknown;
+            } | null;
+            error?: components["schemas"]["JobError"] | null;
+            /** Metadata */
+            metadata?: {
+                [key: string]: unknown;
+            };
+            /** Project Id */
+            project_id?: string | null;
+            /**
+             * Supports Pause
+             * @default false
+             */
+            supports_pause: boolean;
+            /**
+             * Created At
+             * Format: date-time
+             */
+            created_at?: string;
+            /**
+             * Updated At
+             * Format: date-time
+             */
+            updated_at?: string;
+            /** Started At */
+            started_at?: string | null;
+            /** Ended At */
+            ended_at?: string | null;
+        };
         /**
          * JobStatus
          * @enum {string}
@@ -17724,4 +18041,385 @@ export interface operations {
             };
         };
     };
+    stream_job_events_api_jobs_events_get: {
+        parameters: {
+            query?: {
+                /** @description Only stream events for this job id. */
+                job_id?: string | null;
+                /** @description Only stream events for this job type. */
+                type?: string | null;
+                /** @description Only stream events for this project id. */
+                project_id?: string | null;
+            };
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    list_jobs_api_jobs_get: {
+        parameters: {
+            query?: {
+                /** @description Filter by job status. */
+                status?: components["schemas"]["BackgroundJobStatus"] | null;
+                /** @description Filter by job type. */
+                type?: string | null;
+                /** @description Filter by project id. */
+                project_id?: string | null;
+                /** @description Only jobs created at or after this ISO-8601 time. */
+                since?: string | null;
+                /** @description Maximum number of jobs to return. */
+                limit?: number | null;
+            };
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["JobRecord"][];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    create_job_api_jobs__type__post: {
+        parameters: {
+            query?: {
+                /** @description When true, block until the job reaches a terminal state and return the full JobRecord instead of CreateJobResponse. */
+                wait?: boolean;
+                /** @description Seconds to wait when wait=true (504 on timeout). Omit to wait indefinitely. */
+                timeout?: number | null;
+            };
+            header?: never;
+            path: {
+                /** @description The registered job type to run. */
+                type: string;
+            };
+            cookie?: never;
+        };
+        requestBody: {
+            content: {
+                "application/json": components["schemas"]["CreateJobRequest"];
+            };
+        };
+        responses: {
+            /** @description Successful Response */
+            201: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["CreateJobResponse"] | components["schemas"]["JobRecord"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    get_job_api_jobs__id__get: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["JobRecord"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    delete_job_api_jobs__id__delete: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            204: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content?: never;
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    get_job_result_api_jobs__id__result_get: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": {
+                        [key: string]: unknown;
+                    };
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    wait_for_job_api_jobs__id__wait_get: {
+        parameters: {
+            query?: {
+                /** @description Seconds to wait before giving up (504 on timeout). Omit to wait indefinitely. */
+                timeout?: number | null;
+            };
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["JobRecord"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    get_job_errors_api_jobs__id__errors_get: {
+        parameters: {
+            query?: {
+                /** @description Read the error log for a specific past run id. */
+                run_id?: string | null;
+            };
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": {
+                        [key: string]: unknown;
+                    }[];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    pause_job_api_jobs__id__pause_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            202: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    resume_job_api_jobs__id__resume_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            202: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
+    cancel_job_api_jobs__id__cancel_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                /** @description The job id. */
+                id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            202: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
 }
diff --git a/app/web_ui/src/lib/components/SidebarJobsIndicator.svelte b/app/web_ui/src/lib/components/SidebarJobsIndicator.svelte
new file mode 100644
index 000000000..78bb13d83
--- /dev/null
+++ b/app/web_ui/src/lib/components/SidebarJobsIndicator.svelte
@@ -0,0 +1,60 @@
+<script lang="ts">
+  import { active_jobs_count, jobs } from "$lib/stores/jobs_store"
+  import { jobs_indicator } from "$lib/stores/job_status"
+
+  // "rail" overlays the indicator on a sidebar icon (absolute, top-right).
+  // "inline" sits next to a label in the wide drawer.
+  export let variant: "rail" | "inline" = "inline"
+
+  // Default to the live counts, but accept overrides so the component is
+  // render-testable in isolation.
+  export let active_count: number | undefined = undefined
+  export let total_count: number | undefined = undefined
+
+  $: indicator = jobs_indicator(
+    active_count ?? $active_jobs_count,
+    total_count ?? $jobs.length,
+  )
+  $: label =
+    indicator.kind === "hidden"
+      ? ""
+      : indicator.count > 99
+        ? "99+"
+        : `${indicator.count}`
+  $: aria_label =
+    indicator.kind === "spinner"
+      ? `${indicator.count} active jobs`
+      : indicator.kind === "static"
+        ? `${indicator.count} jobs`
+        : ""
+</script>
+
+{#if indicator.kind !== "hidden"}
+  {#if variant === "rail"}
+    <span
+      class="absolute -top-1 -right-1 flex items-center gap-0.5 min-w-4 h-4 px-1 rounded-full text-[10px] leading-4 font-medium text-center {indicator.kind ===
+      'spinner'
+        ? 'bg-primary text-primary-content'
+        : 'bg-base-300 text-base-content/70'}"
+      aria-label={aria_label}
+    >
+      {#if indicator.kind === "spinner"}
+        <span class="loading loading-spinner w-2 h-2" aria-hidden="true"></span>
+      {/if}
+      {label}
+    </span>
+  {:else}
+    <span
+      class="badge badge-sm inline-flex items-center gap-1 {indicator.kind ===
+      'spinner'
+        ? 'badge-primary'
+        : 'badge-ghost text-base-content/70'}"
+      aria-label={aria_label}
+    >
+      {#if indicator.kind === "spinner"}
+        <span class="loading loading-spinner w-3 h-3" aria-hidden="true"></span>
+      {/if}
+      {label}
+    </span>
+  {/if}
+{/if}
diff --git a/app/web_ui/src/lib/components/SidebarJobsIndicator.test.ts b/app/web_ui/src/lib/components/SidebarJobsIndicator.test.ts
new file mode 100644
index 000000000..352ffd3ee
--- /dev/null
+++ b/app/web_ui/src/lib/components/SidebarJobsIndicator.test.ts
@@ -0,0 +1,59 @@
+// @vitest-environment jsdom
+import { describe, it, expect, afterEach, vi } from "vitest"
+import { render, cleanup } from "@testing-library/svelte"
+import { readable } from "svelte/store"
+
+// The real jobs_store opens an EventSource on subscribe; mock it with plain
+// stores so the indicator can render in isolation. The component takes
+// active_count / total_count overrides for the actual assertions below.
+vi.mock("$lib/stores/jobs_store", () => ({
+  active_jobs_count: readable(0),
+  jobs: readable([]),
+}))
+
+const SidebarJobsIndicator = (await import("./SidebarJobsIndicator.svelte"))
+  .default
+
+describe("SidebarJobsIndicator", () => {
+  afterEach(() => {
+    cleanup()
+  })
+
+  it("shows a spinner and active count when jobs are active", () => {
+    const { container, getByText } = render(SidebarJobsIndicator, {
+      props: { active_count: 3, total_count: 5 },
+    })
+    expect(getByText("3")).not.toBeNull()
+    expect(container.querySelector(".loading-spinner")).not.toBeNull()
+  })
+
+  it("shows a static muted count without a spinner when none are active", () => {
+    const { container, getByText } = render(SidebarJobsIndicator, {
+      props: { active_count: 0, total_count: 4 },
+    })
+    expect(getByText("4")).not.toBeNull()
+    expect(container.querySelector(".loading-spinner")).toBeNull()
+  })
+
+  it("renders nothing when there are no jobs", () => {
+    const { container } = render(SidebarJobsIndicator, {
+      props: { active_count: 0, total_count: 0 },
+    })
+    expect(container.textContent?.trim()).toBe("")
+  })
+
+  it("caps the displayed count at 99+", () => {
+    const { getByText } = render(SidebarJobsIndicator, {
+      props: { active_count: 150, total_count: 150 },
+    })
+    expect(getByText("99+")).not.toBeNull()
+  })
+
+  it("uses the rail variant styling when requested", () => {
+    const { container } = render(SidebarJobsIndicator, {
+      props: { active_count: 2, total_count: 2, variant: "rail" },
+    })
+    const span = container.querySelector("span")
+    expect(span?.className).toContain("absolute")
+  })
+})
diff --git a/app/web_ui/src/lib/components/jobs_dialog.component.test.ts b/app/web_ui/src/lib/components/jobs_dialog.component.test.ts
new file mode 100644
index 000000000..73f346a6b
--- /dev/null
+++ b/app/web_ui/src/lib/components/jobs_dialog.component.test.ts
@@ -0,0 +1,118 @@
+// @vitest-environment jsdom
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"
+import { render, cleanup } from "@testing-library/svelte"
+import { tick } from "svelte"
+import { writable } from "svelte/store"
+import type { JobRecord } from "$lib/stores/jobs_api"
+import { jobs_dialog } from "$lib/stores/jobs_dialog"
+
+// The dialog hosts JobsTable, which subscribes to the job stream. Mock the
+// stores/api so the table renders an inert empty state.
+const jobs = writable<JobRecord[]>([])
+const synced = writable(true)
+const connection = writable<"idle" | "connecting" | "open" | "errored">("open")
+
+vi.mock("$lib/stores/jobs_store", () => ({
+  jobs,
+  synced,
+  connection,
+}))
+
+vi.mock("$lib/stores/jobs_api", () => ({
+  pause_job: vi.fn().mockResolvedValue(undefined),
+  resume_job: vi.fn().mockResolvedValue(undefined),
+  cancel_job: vi.fn().mockResolvedValue(undefined),
+  delete_job: vi.fn().mockResolvedValue(undefined),
+  get_job_errors: vi.fn().mockResolvedValue([]),
+  get_job_result: vi.fn().mockResolvedValue({}),
+}))
+
+const JobsDialog = (await import("./jobs_dialog.svelte")).default
+
+// jsdom doesn't implement HTMLDialogElement.showModal/close; emulate them so
+// the `open` property reflects the real show()/close() calls the component makes.
+beforeEach(() => {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  ;(HTMLDialogElement.prototype as any).showModal = function (
+    this: HTMLDialogElement,
+  ) {
+    this.setAttribute("open", "")
+  }
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  ;(HTMLDialogElement.prototype as any).close = function (
+    this: HTMLDialogElement,
+  ) {
+    this.removeAttribute("open")
+  }
+  jobs.set([])
+  synced.set(true)
+  connection.set("open")
+})
+
+afterEach(() => {
+  cleanup()
+})
+
+function jobsDialogEl(container: HTMLElement): HTMLDialogElement {
+  // The first dialog in the tree is the Jobs dialog (the errors/result
+  // sub-dialogs live inside JobsTable and follow it).
+  const el = container.querySelector("dialog")
+  expect(el).not.toBeNull()
+  return el as HTMLDialogElement
+}
+
+describe("JobsDialog open signal", () => {
+  it("stays closed on mount even if the signal has already advanced", async () => {
+    // Advance the module-level signal before the component mounts.
+    jobs_dialog.open()
+    jobs_dialog.open()
+
+    const { container } = render(JobsDialog)
+    await tick()
+
+    expect(jobsDialogEl(container).open).toBe(false)
+  })
+
+  it("opens when jobs_dialog.open() is called", async () => {
+    const { container } = render(JobsDialog)
+    await tick()
+    expect(jobsDialogEl(container).open).toBe(false)
+
+    jobs_dialog.open()
+    await tick()
+
+    expect(jobsDialogEl(container).open).toBe(true)
+  })
+
+  it("re-opens after being closed", async () => {
+    const { container } = render(JobsDialog)
+    await tick()
+
+    jobs_dialog.open()
+    await tick()
+    expect(jobsDialogEl(container).open).toBe(true)
+
+    // Close it the way the user would (the dialog's own close()).
+    jobsDialogEl(container).close()
+    expect(jobsDialogEl(container).open).toBe(false)
+
+    jobs_dialog.open()
+    await tick()
+    expect(jobsDialogEl(container).open).toBe(true)
+  })
+
+  it("does not reopen on an unrelated reactive update", async () => {
+    const { container } = render(JobsDialog)
+    await tick()
+    expect(jobsDialogEl(container).open).toBe(false)
+
+    // Mutate unrelated reactive inputs the dialog/table read; none of these
+    // touch the open signal, so the dialog must remain closed.
+    jobs.set([])
+    synced.set(true)
+    connection.set("open")
+    await tick()
+
+    expect(jobsDialogEl(container).open).toBe(false)
+  })
+})
diff --git a/app/web_ui/src/lib/components/jobs_dialog.svelte b/app/web_ui/src/lib/components/jobs_dialog.svelte
new file mode 100644
index 000000000..71f6f3040
--- /dev/null
+++ b/app/web_ui/src/lib/components/jobs_dialog.svelte
@@ -0,0 +1,30 @@
+<script lang="ts">
+  import { get } from "svelte/store"
+  import Dialog from "$lib/ui/dialog.svelte"
+  import JobsTable from "./jobs_table.svelte"
+  import { jobs_dialog } from "$lib/stores/jobs_dialog"
+
+  let dialog: Dialog
+
+  const jobs_dialog_open_signal = jobs_dialog.open_signal
+
+  // Open whenever the cross-component signal changes. Seed from the current
+  // value so the dialog stays closed on mount even if the signal has already
+  // advanced (e.g. a future conditional remount).
+  let last_signal = get(jobs_dialog_open_signal)
+  $: if ($jobs_dialog_open_signal !== last_signal) {
+    last_signal = $jobs_dialog_open_signal
+    dialog?.show()
+  }
+</script>
+
+<Dialog
+  bind:this={dialog}
+  title="Jobs"
+  width="wide"
+  subtitle="Background work for the current project. Jobs keep running even if you leave this page."
+  sub_subtitle="View full page →"
+  sub_subtitle_link="/jobs"
+>
+  <JobsTable />
+</Dialog>
diff --git a/app/web_ui/src/lib/components/jobs_table.svelte b/app/web_ui/src/lib/components/jobs_table.svelte
new file mode 100644
index 000000000..bc801a350
--- /dev/null
+++ b/app/web_ui/src/lib/components/jobs_table.svelte
@@ -0,0 +1,346 @@
+<script lang="ts">
+  import Dialog from "$lib/ui/dialog.svelte"
+  import Intro from "$lib/ui/intro.svelte"
+  import TableActionMenu from "$lib/ui/table_action_menu.svelte"
+  import type { FloatingMenuItem } from "$lib/ui/floating_menu_types"
+  import JobsIcon from "$lib/ui/icons/jobs_icon.svelte"
+  import { jobs, synced, connection } from "$lib/stores/jobs_store"
+  import {
+    available_actions,
+    completed_jobs,
+    is_terminal,
+    job_status_display_badge_class,
+    job_status_display_label,
+    progress_label,
+    progress_percent,
+    type JobAction,
+  } from "$lib/stores/job_status"
+  import {
+    cancel_job,
+    delete_job,
+    get_job_errors,
+    get_job_result,
+    pause_job,
+    resume_job,
+    type JobError,
+    type JobErrorEntry,
+    type JobRecord,
+  } from "$lib/stores/jobs_api"
+  import { formatDate, capitalize } from "$lib/utils/formatters"
+  import { KilnError, createKilnError } from "$lib/utils/error_handlers"
+
+  let action_error: KilnError | null = null
+  let in_flight: Record<string, boolean> = {}
+  let clearing_completed = false
+
+  $: completed = completed_jobs($jobs)
+
+  const action_runners: Record<JobAction, (id: string) => Promise<void>> = {
+    pause: pause_job,
+    resume: resume_job,
+    cancel: cancel_job,
+    delete: delete_job,
+  }
+
+  const action_labels: Record<JobAction, string> = {
+    pause: "Pause",
+    resume: "Resume",
+    cancel: "Cancel",
+    delete: "Delete",
+  }
+
+  async function run_action(action: JobAction, id: string) {
+    action_error = null
+    in_flight = { ...in_flight, [id]: true }
+    try {
+      await action_runners[action](id)
+      // The SSE stream reflects the resulting transition; no local mutation.
+    } catch (e) {
+      action_error = createKilnError(e)
+    } finally {
+      in_flight = { ...in_flight, [id]: false }
+    }
+  }
+
+  // Best-effort delete of every terminal job. Failures are surfaced but don't
+  // halt the rest; the SSE stream removes the rows as each delete lands.
+  async function clear_completed() {
+    action_error = null
+    clearing_completed = true
+    try {
+      const results = await Promise.allSettled(
+        completed.map((job) => delete_job(job.id)),
+      )
+      const failure = results.find((r) => r.status === "rejected")
+      if (failure && failure.status === "rejected") {
+        action_error = createKilnError(failure.reason)
+      }
+    } finally {
+      clearing_completed = false
+    }
+  }
+
+  function job_type_display(type: string): string {
+    if (type === "noop") {
+      return "No-op"
+    }
+    return capitalize(type)
+  }
+
+  function has_errors(job: JobRecord): boolean {
+    return (job.progress?.error ?? 0) > 0 || job.status === "failed"
+  }
+
+  // Only show a result once the job is in a terminal state — a non-null
+  // `result` mid-run would be partial and misleading.
+  function has_result(job: JobRecord): boolean {
+    return is_terminal(job.status) && job.result != null
+  }
+
+  // Surface the record's failure summary inline for failed jobs.
+  function failure_error(job: JobRecord): JobError | null {
+    return job.status === "failed" ? job.error ?? null : null
+  }
+
+  // Errors dialog state
+  let errors_dialog: Dialog
+  let errors_loading = false
+  let errors_load_error: KilnError | null = null
+  let error_entries: JobErrorEntry[] = []
+  let errors_summary: JobError | null = null
+
+  async function open_errors(job: JobRecord) {
+    error_entries = []
+    errors_load_error = null
+    errors_summary = failure_error(job)
+    errors_loading = true
+    errors_dialog?.show()
+    try {
+      error_entries = await get_job_errors(job.id)
+    } catch (e) {
+      errors_load_error = createKilnError(e)
+    } finally {
+      errors_loading = false
+    }
+  }
+
+  // Result dialog state
+  let result_dialog: Dialog
+  let result_loading = false
+  let result_load_error: KilnError | null = null
+  let result_data: Record<string, unknown> | null = null
+
+  async function open_result(job: JobRecord) {
+    result_data = null
+    result_load_error = null
+    result_loading = true
+    result_dialog?.show()
+    try {
+      result_data = await get_job_result(job.id)
+    } catch (e) {
+      result_load_error = createKilnError(e)
+    } finally {
+      result_loading = false
+    }
+  }
+
+  // The row's overflow menu: view actions first, then lifecycle actions.
+  function row_menu_items(job: JobRecord): FloatingMenuItem[] {
+    const items: FloatingMenuItem[] = []
+    if (has_result(job)) {
+      items.push({ label: "View results", onclick: () => open_result(job) })
+    }
+    if (has_errors(job)) {
+      items.push({ label: "View errors", onclick: () => open_errors(job) })
+    }
+    for (const action of available_actions(job)) {
+      items.push({
+        label: action === "delete" ? "Clear" : action_labels[action],
+        onclick: () => run_action(action, job.id),
+      })
+    }
+    return items
+  }
+</script>
+
+{#if action_error}
+  <div role="alert" class="alert alert-error text-sm mb-4">
+    <span>{action_error.getMessage() || "An action failed."}</span>
+  </div>
+{/if}
+
+{#if !$synced && $connection === "errored"}
+  <div
+    class="flex flex-col items-center justify-center min-h-[50vh] text-center max-w-md mx-auto"
+  >
+    <div class="text-gray-400 mb-3">
+      <span class="loading loading-spinner loading-md"></span>
+    </div>
+    <h3 class="text-lg font-medium">Can't connect to the job stream</h3>
+    <p class="text-sm text-gray-500 mt-2">
+      We lost the connection to the background job updates and are retrying
+      automatically. Jobs keep running in the background — this page will
+      refresh once the connection is restored.
+    </p>
+  </div>
+{:else if !$synced}
+  <div class="w-full min-h-[50vh] flex justify-center items-center">
+    <div class="loading loading-spinner loading-lg"></div>
+  </div>
+{:else if $jobs.length === 0}
+  <div class="flex justify-center items-center min-h-[55vh]">
+    <Intro
+      title="No jobs yet"
+      description_paragraphs={[
+        "Long-running workloads show up here. Manage them from this page, or leave — they'll keep running in the background.",
+      ]}
+    >
+      <div slot="icon" class="w-12 h-12 text-gray-400" aria-hidden="true">
+        <JobsIcon />
+      </div>
+    </Intro>
+  </div>
+{:else}
+  <div class="flex flex-row justify-end mb-3">
+    <button
+      class="btn btn-xs btn-ghost"
+      disabled={clearing_completed || completed.length === 0}
+      on:click={clear_completed}
+    >
+      {#if clearing_completed}
+        <span class="loading loading-spinner loading-xs"></span>
+      {/if}
+      Clear completed
+    </button>
+  </div>
+  <div class="overflow-x-auto rounded-lg border">
+    <table class="table">
+      <thead>
+        <tr>
+          <th>Details</th>
+          <th>Status</th>
+          <th class="text-right"></th>
+        </tr>
+      </thead>
+      <tbody>
+        {#each $jobs as job (job.id)}
+          <tr>
+            <td class="whitespace-nowrap">
+              <div class="flex flex-col gap-1">
+                <span class="font-medium">{job_type_display(job.type)}</span>
+                <span class="font-mono text-xs text-gray-500">{job.id}</span>
+                <span class="text-xs text-gray-500"
+                  >{formatDate(job.created_at)}</span
+                >
+              </div>
+            </td>
+            <td>
+              <div class="flex flex-col gap-2 w-full max-w-[360px] min-w-48">
+                <span
+                  class="badge h-auto px-3 py-1 self-start whitespace-normal text-center leading-tight {job_status_display_badge_class(
+                    job,
+                  )}"
+                >
+                  {job_status_display_label(job)}
+                </span>
+                <div class="flex items-center justify-between text-gray-500">
+                  {#if job.status === "running"}
+                    <span>{progress_percent(job.progress)}% Complete</span>
+                  {/if}
+                  {#if job.progress?.total}
+                    <span>{progress_label(job.progress)}</span>
+                  {/if}
+                </div>
+                {#if progress_percent(job.progress) < 100}
+                  <progress
+                    class="progress progress-primary bg-base-200 w-full h-2"
+                    value={progress_percent(job.progress)}
+                    max="100"
+                  ></progress>
+                  {#if failure_error(job)?.error}
+                    <span
+                      class="font-mono text-sm text-error block truncate"
+                      title={failure_error(job)?.error}
+                      >{failure_error(job)?.error}</span
+                    >
+                  {:else if job.progress?.message}
+                    <span
+                      class="font-mono text-sm text-gray-500 block truncate"
+                      title={job.progress.message}>{job.progress.message}</span
+                    >
+                  {/if}
+                {/if}
+              </div>
+            </td>
+            <td class="align-top">
+              <div class="flex flex-row justify-end items-start">
+                <TableActionMenu items={row_menu_items(job)} />
+              </div>
+            </td>
+          </tr>
+        {/each}
+      </tbody>
+    </table>
+  </div>
+{/if}
+
+<Dialog bind:this={errors_dialog} title="Job Errors" width="wide">
+  {#if errors_summary?.error}
+    <div
+      role="alert"
+      class="alert alert-error text-sm mb-4 flex flex-col items-start gap-1"
+    >
+      <span class="font-medium break-words">{errors_summary.error}</span>
+      {#if errors_summary.detail}
+        <pre
+          class="text-xs w-full bg-base-200 text-base-content rounded-md p-2 overflow-x-auto max-h-48">{JSON.stringify(
+            errors_summary.detail,
+            null,
+            2,
+          )}</pre>
+      {/if}
+    </div>
+  {/if}
+  {#if errors_loading}
+    <div class="flex justify-center py-8">
+      <div class="loading loading-spinner loading-lg"></div>
+    </div>
+  {:else if errors_load_error}
+    <div class="text-error text-sm">
+      {errors_load_error.getMessage() || "Could not load errors."}
+    </div>
+  {:else if error_entries.length === 0}
+    <p class="text-sm text-gray-500">
+      No error messages recorded for this job.
+    </p>
+  {:else}
+    <ul class="flex flex-col gap-2 max-h-[60vh] overflow-y-auto">
+      {#each error_entries as entry, index (index)}
+        <li class="text-sm bg-base-200 rounded-md p-3 font-mono break-words">
+          {entry.error_message || JSON.stringify(entry)}
+        </li>
+      {/each}
+    </ul>
+  {/if}
+</Dialog>
+
+<Dialog bind:this={result_dialog} title="Job Result" width="wide">
+  {#if result_loading}
+    <div class="flex justify-center py-8">
+      <div class="loading loading-spinner loading-lg"></div>
+    </div>
+  {:else if result_load_error}
+    <div class="text-error text-sm">
+      {result_load_error.getMessage() || "Could not load result."}
+    </div>
+  {:else if result_data}
+    <pre
+      class="text-xs bg-base-200 rounded-md p-3 overflow-x-auto max-h-[60vh]">{JSON.stringify(
+        result_data,
+        null,
+        2,
+      )}</pre>
+  {:else}
+    <p class="text-sm text-gray-500">No result available.</p>
+  {/if}
+</Dialog>
diff --git a/app/web_ui/src/lib/components/jobs_table.test.ts b/app/web_ui/src/lib/components/jobs_table.test.ts
new file mode 100644
index 000000000..5cb27901d
--- /dev/null
+++ b/app/web_ui/src/lib/components/jobs_table.test.ts
@@ -0,0 +1,146 @@
+// @vitest-environment jsdom
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"
+import { render, fireEvent, waitFor, cleanup } from "@testing-library/svelte"
+import { writable } from "svelte/store"
+import type { JobRecord } from "$lib/stores/jobs_api"
+
+// Live job list the table renders from. Replaced per-test.
+const jobs = writable<JobRecord[]>([])
+const synced = writable(true)
+const connection = writable<"idle" | "connecting" | "open" | "errored">("open")
+
+vi.mock("$lib/stores/jobs_store", () => ({
+  jobs,
+  synced,
+  connection,
+}))
+
+const api = {
+  pause_job: vi.fn().mockResolvedValue(undefined),
+  resume_job: vi.fn().mockResolvedValue(undefined),
+  cancel_job: vi.fn().mockResolvedValue(undefined),
+  delete_job: vi.fn().mockResolvedValue(undefined),
+  get_job_errors: vi.fn().mockResolvedValue([]),
+  get_job_result: vi.fn().mockResolvedValue({}),
+}
+vi.mock("$lib/stores/jobs_api", () => api)
+
+const JobsTable = (await import("./jobs_table.svelte")).default
+
+function makeJob(overrides: Partial<JobRecord> = {}): JobRecord {
+  return {
+    id: "j_1",
+    type: "noop",
+    status: "running",
+    supports_pause: false,
+    created_at: "2024-01-01T00:00:00Z",
+    ...overrides,
+  }
+}
+
+describe("JobsTable", () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    synced.set(true)
+    connection.set("open")
+    jobs.set([])
+  })
+
+  afterEach(() => {
+    cleanup()
+  })
+
+  it("Clear completed deletes exactly the terminal jobs", async () => {
+    jobs.set([
+      makeJob({ id: "running", status: "running" }),
+      makeJob({ id: "succeeded", status: "succeeded" }),
+      makeJob({ id: "pending", status: "pending" }),
+      makeJob({ id: "failed", status: "failed" }),
+      makeJob({ id: "cancelled", status: "cancelled" }),
+    ])
+    const { getByText } = render(JobsTable)
+
+    await fireEvent.click(getByText("Clear completed"))
+
+    await waitFor(() => {
+      expect(api.delete_job).toHaveBeenCalledTimes(3)
+    })
+    const deleted = api.delete_job.mock.calls.map((c) => c[0]).sort()
+    expect(deleted).toEqual(["cancelled", "failed", "succeeded"])
+    // It must not touch the active jobs.
+    expect(deleted).not.toContain("running")
+    expect(deleted).not.toContain("pending")
+  })
+
+  it("Clear completed surfaces an error when a delete fails", async () => {
+    jobs.set([makeJob({ id: "failed", status: "failed" })])
+    api.delete_job.mockRejectedValueOnce(new Error("boom"))
+    const { getByText, getByRole } = render(JobsTable)
+
+    await fireEvent.click(getByText("Clear completed"))
+
+    await waitFor(() => {
+      expect(getByRole("alert").textContent).toContain("boom")
+    })
+  })
+
+  it("offers a Clear action (not a Delete label) for terminal rows", async () => {
+    jobs.set([makeJob({ id: "succeeded", status: "succeeded" })])
+    const { getByLabelText, getByText, queryByText } = render(JobsTable)
+    await fireEvent.click(getByLabelText("More options"))
+    expect(getByText("Clear")).not.toBeNull()
+    expect(queryByText("Delete")).toBeNull()
+  })
+
+  it("gates row actions on status: running with pause shows Pause + Cancel", async () => {
+    jobs.set([
+      makeJob({ id: "running", status: "running", supports_pause: true }),
+    ])
+    const { getByLabelText, getByText, queryByText } = render(JobsTable)
+    await fireEvent.click(getByLabelText("More options"))
+    expect(getByText("Pause")).not.toBeNull()
+    expect(getByText("Cancel")).not.toBeNull()
+    expect(queryByText("Clear")).toBeNull()
+  })
+
+  it("gates row actions on status: paused shows Resume + Cancel", async () => {
+    jobs.set([makeJob({ id: "paused", status: "paused" })])
+    const { getByLabelText, getByText } = render(JobsTable)
+    await fireEvent.click(getByLabelText("More options"))
+    expect(getByText("Resume")).not.toBeNull()
+    expect(getByText("Cancel")).not.toBeNull()
+  })
+
+  it("gates row actions on status: pending shows only Cancel", async () => {
+    jobs.set([makeJob({ id: "pending", status: "pending" })])
+    const { getByLabelText, getByText, queryByText } = render(JobsTable)
+    await fireEvent.click(getByLabelText("More options"))
+    expect(getByText("Cancel")).not.toBeNull()
+    expect(queryByText("Pause")).toBeNull()
+    expect(queryByText("Resume")).toBeNull()
+    expect(queryByText("Clear")).toBeNull()
+  })
+
+  it("shows the loading spinner before the first sync", () => {
+    synced.set(false)
+    connection.set("connecting")
+    const { container, queryByText } = render(JobsTable)
+    expect(container.querySelector(".loading.loading-spinner")).not.toBeNull()
+    // Neither the table nor the empty state should render while syncing.
+    expect(queryByText("No jobs yet")).toBeNull()
+    expect(container.querySelector("table")).toBeNull()
+  })
+
+  it("shows the empty state when there are no jobs", () => {
+    jobs.set([])
+    const { getByText } = render(JobsTable)
+    expect(getByText("No jobs yet")).not.toBeNull()
+  })
+
+  it("shows the connection-error state when errored before first sync", () => {
+    synced.set(false)
+    connection.set("errored")
+    const { getByText } = render(JobsTable)
+    expect(getByText("Can't connect to the job stream")).not.toBeNull()
+  })
+})
diff --git a/app/web_ui/src/lib/stores/job_status.test.ts b/app/web_ui/src/lib/stores/job_status.test.ts
new file mode 100644
index 000000000..8b8a5d0dc
--- /dev/null
+++ b/app/web_ui/src/lib/stores/job_status.test.ts
@@ -0,0 +1,218 @@
+import { describe, it, expect } from "vitest"
+import {
+  available_actions,
+  completed_jobs,
+  is_active,
+  is_terminal,
+  job_completed_with_errors,
+  job_status_badge_class,
+  job_status_display,
+  job_status_display_badge_class,
+  job_status_display_label,
+  jobs_indicator,
+  progress_label,
+  progress_percent,
+} from "./job_status"
+import type { BackgroundJobStatus, JobRecord } from "./jobs_api"
+
+function makeJob(overrides: Partial<JobRecord> = {}): JobRecord {
+  return {
+    id: "j_1",
+    type: "noop",
+    status: "running",
+    supports_pause: false,
+    ...overrides,
+  }
+}
+
+describe("is_active / is_terminal", () => {
+  it("treats pending, running, paused as active", () => {
+    expect(is_active("pending")).toBe(true)
+    expect(is_active("running")).toBe(true)
+    expect(is_active("paused")).toBe(true)
+  })
+
+  it("treats terminal statuses as not active", () => {
+    expect(is_active("succeeded")).toBe(false)
+    expect(is_active("failed")).toBe(false)
+    expect(is_active("cancelled")).toBe(false)
+  })
+
+  it("identifies terminal statuses", () => {
+    expect(is_terminal("succeeded")).toBe(true)
+    expect(is_terminal("failed")).toBe(true)
+    expect(is_terminal("cancelled")).toBe(true)
+    expect(is_terminal("running")).toBe(false)
+  })
+})
+
+describe("available_actions", () => {
+  it("running without pause support: cancel only", () => {
+    expect(available_actions(makeJob({ status: "running" }))).toEqual([
+      "cancel",
+    ])
+  })
+
+  it("running with pause support: pause then cancel", () => {
+    expect(
+      available_actions(makeJob({ status: "running", supports_pause: true })),
+    ).toEqual(["pause", "cancel"])
+  })
+
+  it("paused: resume and cancel", () => {
+    expect(
+      available_actions(makeJob({ status: "paused", supports_pause: true })),
+    ).toEqual(["resume", "cancel"])
+  })
+
+  it("pending: cancel only", () => {
+    expect(available_actions(makeJob({ status: "pending" }))).toEqual([
+      "cancel",
+    ])
+  })
+
+  it("terminal states: delete only", () => {
+    for (const status of [
+      "succeeded",
+      "failed",
+      "cancelled",
+    ] as BackgroundJobStatus[]) {
+      expect(available_actions(makeJob({ status }))).toEqual(["delete"])
+    }
+  })
+})
+
+describe("job_status_display / job_status_badge_class", () => {
+  const cases: [BackgroundJobStatus, string, string][] = [
+    ["pending", "Pending", "badge-outline"],
+    ["running", "Running", "badge-outline badge-success"],
+    ["paused", "Paused", "badge-outline badge-warning"],
+    ["succeeded", "Succeeded", "badge-outline badge-primary"],
+    ["failed", "Failed", "badge-outline badge-error"],
+    ["cancelled", "Cancelled", "badge-outline"],
+  ]
+  it.each(cases)("maps %s", (status, label, badge) => {
+    expect(job_status_display(status)).toBe(label)
+    expect(job_status_badge_class(status)).toBe(badge)
+  })
+})
+
+describe("job_completed_with_errors / display helpers", () => {
+  it("is true only when succeeded with a positive error count", () => {
+    expect(
+      job_completed_with_errors(
+        makeJob({ status: "succeeded", progress: { success: 8, error: 2 } }),
+      ),
+    ).toBe(true)
+  })
+
+  it("is false when succeeded without errors", () => {
+    expect(
+      job_completed_with_errors(
+        makeJob({ status: "succeeded", progress: { success: 10, error: 0 } }),
+      ),
+    ).toBe(false)
+  })
+
+  it("is false for non-succeeded statuses even with errors", () => {
+    expect(
+      job_completed_with_errors(
+        makeJob({ status: "running", progress: { success: 1, error: 3 } }),
+      ),
+    ).toBe(false)
+    expect(
+      job_completed_with_errors(
+        makeJob({ status: "failed", progress: { success: 1, error: 3 } }),
+      ),
+    ).toBe(false)
+  })
+
+  it("derives label and badge for completed-with-errors", () => {
+    const job = makeJob({
+      status: "succeeded",
+      progress: { success: 8, error: 2 },
+    })
+    expect(job_status_display_label(job)).toBe("Completed with errors")
+    expect(job_status_display_badge_class(job)).toBe(
+      "badge-outline badge-error",
+    )
+  })
+
+  it("falls back to plain status display when there are no errors", () => {
+    const job = makeJob({
+      status: "succeeded",
+      progress: { success: 10, error: 0 },
+    })
+    expect(job_status_display_label(job)).toBe("Succeeded")
+    expect(job_status_display_badge_class(job)).toBe(
+      "badge-outline badge-primary",
+    )
+  })
+})
+
+describe("progress_label", () => {
+  it("shows count only when total is null", () => {
+    expect(progress_label({ success: 3, error: 0 })).toBe("3")
+  })
+
+  it("shows success / total", () => {
+    expect(progress_label({ success: 3, error: 0, total: 10 })).toBe("3 / 10")
+  })
+
+  it("appends errored count when present", () => {
+    expect(progress_label({ success: 3, error: 2, total: 10 })).toBe(
+      "3 / 10 (2 errored)",
+    )
+  })
+
+  it("handles undefined progress", () => {
+    expect(progress_label(undefined)).toBe("0")
+  })
+})
+
+describe("progress_percent", () => {
+  it("returns 0 when total is null or zero", () => {
+    expect(progress_percent({ success: 1, error: 0 })).toBe(0)
+    expect(progress_percent({ success: 1, error: 0, total: 0 })).toBe(0)
+  })
+
+  it("computes processed / total as a percent", () => {
+    expect(progress_percent({ success: 2, error: 1, total: 10 })).toBe(30)
+  })
+
+  it("returns 100 when complete", () => {
+    expect(progress_percent({ success: 8, error: 2, total: 10 })).toBe(100)
+  })
+})
+
+describe("completed_jobs", () => {
+  it("returns exactly the terminal jobs", () => {
+    const jobs = [
+      makeJob({ id: "a", status: "running" }),
+      makeJob({ id: "b", status: "succeeded" }),
+      makeJob({ id: "c", status: "pending" }),
+      makeJob({ id: "d", status: "failed" }),
+      makeJob({ id: "e", status: "paused" }),
+      makeJob({ id: "f", status: "cancelled" }),
+    ]
+    expect(completed_jobs(jobs).map((j) => j.id)).toEqual(["b", "d", "f"])
+  })
+
+  it("returns an empty array when nothing is terminal", () => {
+    expect(completed_jobs([makeJob({ status: "running" })])).toEqual([])
+  })
+})
+
+describe("jobs_indicator", () => {
+  it("shows a spinner with the active count when any job is active", () => {
+    expect(jobs_indicator(2, 5)).toEqual({ kind: "spinner", count: 2 })
+  })
+
+  it("shows a static total count when none active but jobs remain", () => {
+    expect(jobs_indicator(0, 3)).toEqual({ kind: "static", count: 3 })
+  })
+
+  it("is hidden when there are no jobs at all", () => {
+    expect(jobs_indicator(0, 0)).toEqual({ kind: "hidden" })
+  })
+})
diff --git a/app/web_ui/src/lib/stores/job_status.ts b/app/web_ui/src/lib/stores/job_status.ts
new file mode 100644
index 000000000..6e58dfc92
--- /dev/null
+++ b/app/web_ui/src/lib/stores/job_status.ts
@@ -0,0 +1,159 @@
+import type { BackgroundJobStatus, JobProgress, JobRecord } from "./jobs_api"
+
+export const ACTIVE_STATUSES: readonly BackgroundJobStatus[] = [
+  "pending",
+  "running",
+  "paused",
+]
+
+export const TERMINAL_STATUSES: readonly BackgroundJobStatus[] = [
+  "succeeded",
+  "failed",
+  "cancelled",
+]
+
+export function is_active(status: BackgroundJobStatus): boolean {
+  return ACTIVE_STATUSES.includes(status)
+}
+
+export function is_terminal(status: BackgroundJobStatus): boolean {
+  return TERMINAL_STATUSES.includes(status)
+}
+
+export function job_status_display(status: BackgroundJobStatus): string {
+  switch (status) {
+    case "pending":
+      return "Pending"
+    case "running":
+      return "Running"
+    case "paused":
+      return "Paused"
+    case "succeeded":
+      return "Succeeded"
+    case "failed":
+      return "Failed"
+    case "cancelled":
+      return "Cancelled"
+    default: {
+      const exhaustive: never = status
+      return exhaustive
+    }
+  }
+}
+
+export function job_status_badge_class(status: BackgroundJobStatus): string {
+  switch (status) {
+    case "running":
+      return "badge-outline badge-success"
+    case "succeeded":
+      return "badge-outline badge-primary"
+    case "failed":
+      return "badge-outline badge-error"
+    case "paused":
+      return "badge-outline badge-warning"
+    case "pending":
+      return "badge-outline"
+    case "cancelled":
+      return "badge-outline"
+    default: {
+      const exhaustive: never = status
+      return exhaustive
+    }
+  }
+}
+
+// A job that finished successfully but logged one or more non-fatal per-item
+// errors. Like RAG's `completed_with_errors`, this is a frontend-derived display
+// state only — the backend status stays `succeeded` and the error detail lives
+// in the per-run error log. No worker/backend change is needed.
+export function job_completed_with_errors(job: JobRecord): boolean {
+  return job.status === "succeeded" && (job.progress?.error ?? 0) > 0
+}
+
+export function job_status_display_label(job: JobRecord): string {
+  if (job_completed_with_errors(job)) {
+    return "Completed with errors"
+  }
+  return job_status_display(job.status)
+}
+
+export function job_status_display_badge_class(job: JobRecord): string {
+  if (job_completed_with_errors(job)) {
+    return "badge-outline badge-error"
+  }
+  return job_status_badge_class(job.status)
+}
+
+export type JobAction = "pause" | "resume" | "cancel" | "delete"
+
+// The set of lifecycle actions valid for a job given its status and whether
+// its worker supports pause. Mirrors the state machine (functional_spec §3) and
+// the delete policy (architecture open item #7: delete only on terminal state).
+export function available_actions(job: JobRecord): JobAction[] {
+  switch (job.status) {
+    case "running": {
+      const actions: JobAction[] = ["cancel"]
+      if (job.supports_pause) {
+        actions.unshift("pause")
+      }
+      return actions
+    }
+    case "paused":
+      return ["resume", "cancel"]
+    case "pending":
+      return ["cancel"]
+    case "succeeded":
+    case "failed":
+    case "cancelled":
+      return ["delete"]
+    default: {
+      const exhaustive: never = job.status
+      return exhaustive
+    }
+  }
+}
+
+export function progress_label(progress: JobProgress | undefined): string {
+  const success = progress?.success ?? 0
+  const total = progress?.total
+  const base = total == null ? `${success}` : `${success} / ${total}`
+  const error = progress?.error ?? 0
+  return error > 0 ? `${base} (${error} errored)` : base
+}
+
+export function progress_percent(progress: JobProgress | undefined): number {
+  const total = progress?.total
+  if (!total || total <= 0) {
+    return 0
+  }
+  const processed = (progress?.success ?? 0) + (progress?.error ?? 0)
+  return Math.max(0, Math.min(100, Math.round((processed / total) * 100)))
+}
+
+// The jobs that "Clear completed" removes: every job in a terminal state.
+export function completed_jobs(jobs: JobRecord[]): JobRecord[] {
+  return jobs.filter((job) => is_terminal(job.status))
+}
+
+// What the sidebar Jobs indicator should render, derived purely from the live
+// counts so it can be unit-tested without mounting the component:
+//   - "spinner": at least one active job; show a subtle spinner + active count.
+//   - "static": no active jobs but some still exist; show a muted total count.
+//   - "hidden": no jobs at all; show no indicator.
+export type JobsIndicator =
+  | { kind: "spinner"; count: number }
+  | { kind: "static"; count: number }
+  | { kind: "hidden" }
+
+export function jobs_indicator(
+  active_count: number,
+  total_count: number,
+): JobsIndicator {
+  if (active_count > 0) {
+    return { kind: "spinner", count: active_count }
+  }
+  if (total_count > 0) {
+    return { kind: "static", count: total_count }
+  }
+  return { kind: "hidden" }
+}
diff --git a/app/web_ui/src/lib/stores/jobs_api.test.ts b/app/web_ui/src/lib/stores/jobs_api.test.ts
new file mode 100644
index 000000000..84770438c
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_api.test.ts
@@ -0,0 +1,150 @@
+import { describe, it, expect, vi, beforeEach } from "vitest"
+import { client } from "$lib/api_client"
+import {
+  cancel_job,
+  create_job,
+  delete_job,
+  get_job,
+  get_job_errors,
+  get_job_result,
+  list_jobs,
+  pause_job,
+  resume_job,
+} from "./jobs_api"
+
+vi.mock("$lib/api_client", () => ({
+  client: {
+    GET: vi.fn(),
+    POST: vi.fn(),
+    DELETE: vi.fn(),
+  },
+  base_url: "http://localhost:8757",
+}))
+
+const mockGET = client.GET as unknown as ReturnType<typeof vi.fn>
+const mockPOST = client.POST as unknown as ReturnType<typeof vi.fn>
+const mockDELETE = client.DELETE as unknown as ReturnType<typeof vi.fn>
+
+describe("jobs_api", () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it("list_jobs calls GET /api/jobs with the query and returns data", async () => {
+    mockGET.mockResolvedValue({ data: [{ id: "j_1" }], error: undefined })
+    const result = await list_jobs({ project_id: "p_1", status: "running" })
+    expect(mockGET).toHaveBeenCalledWith("/api/jobs", {
+      params: { query: { project_id: "p_1", status: "running" } },
+    })
+    expect(result).toEqual([{ id: "j_1" }])
+  })
+
+  it("list_jobs throws when the client returns an error", async () => {
+    mockGET.mockResolvedValue({ data: undefined, error: { detail: "boom" } })
+    await expect(list_jobs()).rejects.toEqual({ detail: "boom" })
+  })
+
+  it("get_job calls GET /api/jobs/{id}", async () => {
+    mockGET.mockResolvedValue({ data: { id: "j_2" }, error: undefined })
+    const result = await get_job("j_2")
+    expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}", {
+      params: { path: { id: "j_2" } },
+    })
+    expect(result).toEqual({ id: "j_2" })
+  })
+
+  it("create_job calls POST /api/jobs/{type} with params and metadata", async () => {
+    mockPOST.mockResolvedValue({
+      data: { job_id: "j_3", status: "pending" },
+      error: undefined,
+    })
+    const result = await create_job("eval", { eval_id: "e_1" }, { src: "ui" })
+    expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{type}", {
+      params: { path: { type: "eval" } },
+      body: {
+        params: { eval_id: "e_1" },
+        metadata: { src: "ui" },
+        project_id: null,
+      },
+    })
+    expect(result).toEqual({ job_id: "j_3", status: "pending" })
+  })
+
+  it("create_job passes an explicit project_id in the body", async () => {
+    mockPOST.mockResolvedValue({
+      data: { job_id: "j_3b", status: "pending" },
+      error: undefined,
+    })
+    await create_job("noop", { steps: 5 }, null, "p_current")
+    expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{type}", {
+      params: { path: { type: "noop" } },
+      body: { params: { steps: 5 }, metadata: null, project_id: "p_current" },
+    })
+  })
+
+  it("get_job_result calls GET /api/jobs/{id}/result", async () => {
+    mockGET.mockResolvedValue({ data: { total: 5 }, error: undefined })
+    const result = await get_job_result("j_4")
+    expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/result", {
+      params: { path: { id: "j_4" } },
+    })
+    expect(result).toEqual({ total: 5 })
+  })
+
+  it("get_job_errors calls GET /api/jobs/{id}/errors with optional run_id", async () => {
+    mockGET.mockResolvedValue({
+      data: [{ error_message: "oops" }],
+      error: undefined,
+    })
+    const result = await get_job_errors("j_5", "run_xyz")
+    expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/errors", {
+      params: { path: { id: "j_5" }, query: { run_id: "run_xyz" } },
+    })
+    expect(result).toEqual([{ error_message: "oops" }])
+  })
+
+  it("get_job_errors omits run_id query when not provided", async () => {
+    mockGET.mockResolvedValue({ data: [], error: undefined })
+    await get_job_errors("j_6")
+    expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/errors", {
+      params: { path: { id: "j_6" }, query: {} },
+    })
+  })
+
+  it("pause_job calls POST /api/jobs/{id}/pause", async () => {
+    mockPOST.mockResolvedValue({ data: undefined, error: undefined })
+    await pause_job("j_7")
+    expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/pause", {
+      params: { path: { id: "j_7" } },
+    })
+  })
+
+  it("resume_job calls POST /api/jobs/{id}/resume", async () => {
+    mockPOST.mockResolvedValue({ data: undefined, error: undefined })
+    await resume_job("j_8")
+    expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/resume", {
+      params: { path: { id: "j_8" } },
+    })
+  })
+
+  it("cancel_job calls POST /api/jobs/{id}/cancel", async () => {
+    mockPOST.mockResolvedValue({ data: undefined, error: undefined })
+    await cancel_job("j_9")
+    expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/cancel", {
+      params: { path: { id: "j_9" } },
+    })
+  })
+
+  it("delete_job calls DELETE /api/jobs/{id}", async () => {
+    mockDELETE.mockResolvedValue({ data: undefined, error: undefined })
+    await delete_job("j_10")
+    expect(mockDELETE).toHaveBeenCalledWith("/api/jobs/{id}", {
+      params: { path: { id: "j_10" } },
+    })
+  })
+
+  it("lifecycle calls throw on client error", async () => {
+    mockPOST.mockResolvedValue({ data: undefined, error: { detail: "409" } })
+    await expect(cancel_job("j_11")).rejects.toEqual({ detail: "409" })
+  })
+})
diff --git a/app/web_ui/src/lib/stores/jobs_api.ts b/app/web_ui/src/lib/stores/jobs_api.ts
new file mode 100644
index 000000000..d0070c53a
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_api.ts
@@ -0,0 +1,121 @@
+import { client } from "$lib/api_client"
+import type { components } from "$lib/api_schema"
+
+export type JobRecord = components["schemas"]["JobRecord"]
+export type JobProgress = components["schemas"]["JobProgress"]
+export type JobError = components["schemas"]["JobError"]
+export type BackgroundJobStatus = components["schemas"]["BackgroundJobStatus"]
+
+export type JobErrorEntry = {
+  error_message?: string
+} & Record<string, unknown>
+
+export type ListJobsQuery = {
+  status?: BackgroundJobStatus
+  type?: string
+  project_id?: string
+  since?: string
+  limit?: number
+}
+
+export async function list_jobs(
+  query: ListJobsQuery = {},
+): Promise<JobRecord[]> {
+  const { data, error } = await client.GET("/api/jobs", {
+    params: { query },
+  })
+  if (error) {
+    throw error
+  }
+  return data
+}
+
+export async function get_job(id: string): Promise<JobRecord> {
+  const { data, error } = await client.GET("/api/jobs/{id}", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+  return data
+}
+
+export async function create_job(
+  type: string,
+  params: Record<string, unknown> = {},
+  metadata: Record<string, unknown> | null = null,
+  project_id: string | null = null,
+): Promise<
+  | components["schemas"]["CreateJobResponse"]
+  | components["schemas"]["JobRecord"]
+> {
+  const { data, error } = await client.POST("/api/jobs/{type}", {
+    params: { path: { type } },
+    body: { params, metadata, project_id },
+  })
+  if (error) {
+    throw error
+  }
+  return data
+}
+
+export async function get_job_result(
+  id: string,
+): Promise<Record<string, unknown>> {
+  const { data, error } = await client.GET("/api/jobs/{id}/result", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+  return data
+}
+
+export async function get_job_errors(
+  id: string,
+  run_id?: string,
+): Promise<JobErrorEntry[]> {
+  const { data, error } = await client.GET("/api/jobs/{id}/errors", {
+    params: { path: { id }, query: run_id ? { run_id } : {} },
+  })
+  if (error) {
+    throw error
+  }
+  return data as JobErrorEntry[]
+}
+
+export async function pause_job(id: string): Promise<void> {
+  const { error } = await client.POST("/api/jobs/{id}/pause", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+}
+
+export async function resume_job(id: string): Promise<void> {
+  const { error } = await client.POST("/api/jobs/{id}/resume", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+}
+
+export async function cancel_job(id: string): Promise<void> {
+  const { error } = await client.POST("/api/jobs/{id}/cancel", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+}
+
+export async function delete_job(id: string): Promise<void> {
+  const { error } = await client.DELETE("/api/jobs/{id}", {
+    params: { path: { id } },
+  })
+  if (error) {
+    throw error
+  }
+}
diff --git a/app/web_ui/src/lib/stores/jobs_dialog.test.ts b/app/web_ui/src/lib/stores/jobs_dialog.test.ts
new file mode 100644
index 000000000..f36289de4
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_dialog.test.ts
@@ -0,0 +1,14 @@
+import { describe, it, expect } from "vitest"
+import { get } from "svelte/store"
+import { jobs_dialog } from "./jobs_dialog"
+
+describe("jobs_dialog", () => {
+  it("bumps the open signal each time open() is called", () => {
+    const before = get(jobs_dialog.open_signal)
+    jobs_dialog.open()
+    const afterOne = get(jobs_dialog.open_signal)
+    expect(afterOne).toBe(before + 1)
+    jobs_dialog.open()
+    expect(get(jobs_dialog.open_signal)).toBe(before + 2)
+  })
+})
diff --git a/app/web_ui/src/lib/stores/jobs_dialog.ts b/app/web_ui/src/lib/stores/jobs_dialog.ts
new file mode 100644
index 000000000..43ea3b13d
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_dialog.ts
@@ -0,0 +1,22 @@
+import { writable } from "svelte/store"
+
+// Cross-component channel for opening the global jobs dialog. The dialog itself
+// is mounted once in (app)/+layout.svelte and subscribes here; any component
+// (e.g. the sidebar Jobs widget) can trigger it via `jobs_dialog.open()`.
+function createJobsDialog() {
+  // Bumped on each open() call. The layout-mounted dialog watches this counter
+  // and shows itself whenever it changes, so repeated opens always re-show even
+  // if the value of a boolean flag wouldn't have changed.
+  const open_signal = writable(0)
+
+  function open() {
+    open_signal.update((n) => n + 1)
+  }
+
+  return {
+    open_signal: { subscribe: open_signal.subscribe },
+    open,
+  }
+}
+
+export const jobs_dialog = createJobsDialog()
diff --git a/app/web_ui/src/lib/stores/jobs_store.test.ts b/app/web_ui/src/lib/stores/jobs_store.test.ts
new file mode 100644
index 000000000..2eb1d5def
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_store.test.ts
@@ -0,0 +1,305 @@
+// @vitest-environment jsdom
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"
+import { get, writable } from "svelte/store"
+import type { JobRecord } from "./jobs_api"
+
+// ui_state drives the project filter. Provide a real writable so we can flip
+// the current project mid-test.
+const ui_state = writable<{ current_project_id: string | null }>({
+  current_project_id: null,
+})
+
+vi.mock("$lib/api_client", () => ({
+  base_url: "http://localhost:8757",
+  client: {},
+}))
+
+vi.mock("$lib/stores", () => ({
+  ui_state,
+}))
+
+// Spy on every mutation entry point. The store is a pure observer: it must
+// never call any of these. We assert that explicitly on teardown below.
+const mutationSpies = {
+  pause_job: vi.fn(),
+  resume_job: vi.fn(),
+  cancel_job: vi.fn(),
+  delete_job: vi.fn(),
+  create_job: vi.fn(),
+}
+vi.mock("./jobs_api", () => mutationSpies)
+
+// A controllable fake EventSource installed on globalThis. Records construction
+// URLs and close() calls so tests can assert the pure-observer / reconnect
+// behavior without a real network connection.
+type Listener = (event: MessageEvent) => void
+
+class FakeEventSource {
+  static instances: FakeEventSource[] = []
+  url: string
+  closed = false
+  onerror: ((this: EventSource, ev: Event) => void) | null = null
+  private listeners: Record<string, Listener[]> = {}
+
+  constructor(url: string) {
+    this.url = url
+    FakeEventSource.instances.push(this)
+  }
+
+  addEventListener(type: string, listener: Listener) {
+    ;(this.listeners[type] ||= []).push(listener)
+  }
+
+  close() {
+    this.closed = true
+  }
+
+  emit(type: string, data: unknown) {
+    const event = { data: JSON.stringify(data) } as MessageEvent
+    for (const listener of this.listeners[type] || []) {
+      listener(event)
+    }
+  }
+
+  fail() {
+    this.onerror?.call(this as unknown as EventSource, new Event("error"))
+  }
+
+  static latest(): FakeEventSource {
+    return FakeEventSource.instances[FakeEventSource.instances.length - 1]
+  }
+
+  static reset() {
+    FakeEventSource.instances = []
+  }
+}
+
+function makeJob(overrides: Partial<JobRecord> = {}): JobRecord {
+  return {
+    id: "j_1",
+    type: "noop",
+    status: "running",
+    supports_pause: true,
+    created_at: "2026-05-28T12:00:00Z",
+    ...overrides,
+  }
+}
+
+// Import the module fresh per test so the ref-counted connection and the
+// module-level ui_state subscription start clean.
+async function loadStore() {
+  vi.resetModules()
+  ui_state.set({ current_project_id: null })
+  FakeEventSource.reset()
+  return await import("./jobs_store")
+}
+
+describe("jobs_store", () => {
+  beforeEach(() => {
+    vi.useFakeTimers()
+    // @ts-expect-error install fake on global
+    globalThis.EventSource = FakeEventSource
+    for (const spy of Object.values(mutationSpies)) {
+      spy.mockClear()
+    }
+  })
+
+  afterEach(() => {
+    vi.useRealTimers()
+    vi.restoreAllMocks()
+  })
+
+  it("snapshot replaces the whole map", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+
+    source.emit("snapshot", {
+      jobs: [makeJob({ id: "j_1" }), makeJob({ id: "j_2" })],
+    })
+    expect(
+      get(jobs)
+        .map((j) => j.id)
+        .sort(),
+    ).toEqual(["j_1", "j_2"])
+
+    // A second snapshot fully replaces the prior contents.
+    source.emit("snapshot", { jobs: [makeJob({ id: "j_3" })] })
+    expect(get(jobs).map((j) => j.id)).toEqual(["j_3"])
+    unsub()
+  })
+
+  it("job event inserts a new job", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+    source.emit("snapshot", { jobs: [] })
+    source.emit("job", makeJob({ id: "j_new" }))
+    expect(get(jobs).map((j) => j.id)).toEqual(["j_new"])
+    unsub()
+  })
+
+  it("job event upserts status + progress for an existing job", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+    source.emit("snapshot", {
+      jobs: [
+        makeJob({
+          id: "j_1",
+          status: "running",
+          progress: { success: 1, error: 0, total: 10 },
+        }),
+      ],
+    })
+    source.emit(
+      "job",
+      makeJob({
+        id: "j_1",
+        status: "succeeded",
+        progress: { success: 10, error: 0, total: 10 },
+      }),
+    )
+    const job = get(jobs)[0]
+    expect(job.status).toBe("succeeded")
+    expect(job.progress?.success).toBe(10)
+    unsub()
+  })
+
+  it("deleted event removes a job; unknown id is a no-op", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+    source.emit("snapshot", {
+      jobs: [makeJob({ id: "j_1" }), makeJob({ id: "j_2" })],
+    })
+    source.emit("deleted", { id: "j_1" })
+    expect(get(jobs).map((j) => j.id)).toEqual(["j_2"])
+    source.emit("deleted", { id: "does_not_exist" })
+    expect(get(jobs).map((j) => j.id)).toEqual(["j_2"])
+    unsub()
+  })
+
+  it("reconnects on error and re-syncs from the fresh snapshot", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const first = FakeEventSource.latest()
+    first.emit("snapshot", { jobs: [makeJob({ id: "stale" })] })
+    expect(get(jobs).map((j) => j.id)).toEqual(["stale"])
+
+    first.fail()
+    expect(first.closed).toBe(true)
+
+    // After the backoff a new EventSource is constructed.
+    vi.advanceTimersByTime(2000)
+    expect(FakeEventSource.instances.length).toBe(2)
+    const second = FakeEventSource.latest()
+    expect(second).not.toBe(first)
+
+    second.emit("snapshot", { jobs: [makeJob({ id: "fresh" })] })
+    expect(get(jobs).map((j) => j.id)).toEqual(["fresh"])
+    unsub()
+  })
+
+  it("active_jobs_count counts only pending/running/paused", async () => {
+    const { jobs, active_jobs_count } = await loadStore()
+    const unsubJobs = jobs.subscribe(() => {})
+    const unsub = active_jobs_count.subscribe(() => {})
+    const source = FakeEventSource.latest()
+    source.emit("snapshot", {
+      jobs: [
+        makeJob({ id: "a", status: "pending" }),
+        makeJob({ id: "b", status: "running" }),
+        makeJob({ id: "c", status: "paused" }),
+        makeJob({ id: "d", status: "succeeded" }),
+        makeJob({ id: "e", status: "failed" }),
+      ],
+    })
+    expect(get(active_jobs_count)).toBe(3)
+    unsub()
+    unsubJobs()
+  })
+
+  it("closes the EventSource when the last subscriber unsubscribes (pure observer)", async () => {
+    const { jobs } = await loadStore()
+    const unsub1 = jobs.subscribe(() => {})
+    const unsub2 = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+    // Only one EventSource is opened regardless of subscriber count.
+    expect(FakeEventSource.instances.length).toBe(1)
+
+    unsub1()
+    expect(source.closed).toBe(false)
+    unsub2()
+    expect(source.closed).toBe(true)
+  })
+
+  it("opens with the project filter and re-opens when the project changes", async () => {
+    const { jobs } = await loadStore()
+    ui_state.set({ current_project_id: "p_1" })
+    const unsub = jobs.subscribe(() => {})
+    const first = FakeEventSource.latest()
+    expect(first.url).toContain("project_id=p_1")
+
+    ui_state.set({ current_project_id: "p_2" })
+    expect(first.closed).toBe(true)
+    const second = FakeEventSource.latest()
+    expect(second).not.toBe(first)
+    expect(second.url).toContain("project_id=p_2")
+    unsub()
+  })
+
+  it("ignores ui_state changes that don't touch current_project_id", async () => {
+    const { jobs } = await loadStore()
+    ui_state.set({ current_project_id: "p_1" })
+    const unsub = jobs.subscribe(() => {})
+    const first = FakeEventSource.latest()
+    expect(FakeEventSource.instances.length).toBe(1)
+
+    // An unrelated ui_state update with the same project id must not re-open.
+    ui_state.set({ current_project_id: "p_1", other: "x" } as {
+      current_project_id: string | null
+    })
+    expect(FakeEventSource.instances.length).toBe(1)
+    expect(first.closed).toBe(false)
+    unsub()
+  })
+
+  it("reports an errored connection when the stream fails before syncing", async () => {
+    const { jobs, connection } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    expect(get(connection)).toBe("connecting")
+
+    FakeEventSource.latest().fail()
+    expect(get(connection)).toBe("errored")
+    unsub()
+  })
+
+  it("connection becomes open once a snapshot arrives", async () => {
+    const { jobs, connection } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    FakeEventSource.latest().emit("snapshot", { jobs: [] })
+    expect(get(connection)).toBe("open")
+    unsub()
+  })
+
+  it("never calls a mutation endpoint (pure observer) across its full lifecycle", async () => {
+    const { jobs } = await loadStore()
+    const unsub = jobs.subscribe(() => {})
+    const source = FakeEventSource.latest()
+
+    // Drive every observable path: snapshot, job upsert, deletion, an error +
+    // reconnect, a project switch, and finally teardown.
+    source.emit("snapshot", { jobs: [makeJob({ id: "j_1" })] })
+    source.emit("job", makeJob({ id: "j_1", status: "succeeded" }))
+    source.emit("deleted", { id: "j_1" })
+    source.fail()
+    vi.advanceTimersByTime(2000)
+    ui_state.set({ current_project_id: "p_switch" })
+    unsub()
+
+    for (const spy of Object.values(mutationSpies)) {
+      expect(spy).not.toHaveBeenCalled()
+    }
+  })
+})
diff --git a/app/web_ui/src/lib/stores/jobs_store.ts b/app/web_ui/src/lib/stores/jobs_store.ts
new file mode 100644
index 000000000..1718f95bf
--- /dev/null
+++ b/app/web_ui/src/lib/stores/jobs_store.ts
@@ -0,0 +1,244 @@
+import { derived, get, writable, type Readable } from "svelte/store"
+import { base_url } from "$lib/api_client"
+import { ui_state } from "$lib/stores"
+import type { JobRecord } from "./jobs_api"
+import { is_active } from "./job_status"
+
+const RECONNECT_DELAY_MS = 2000
+
+type JobsMap = Map<string, JobRecord>
+
+// Connection state surfaced to the UI so the panel can distinguish "still
+// connecting" from "can't connect". Stays a pure observer: this only reports
+// the EventSource lifecycle, it never triggers a job mutation.
+export type JobsConnection = "idle" | "connecting" | "open" | "errored"
+
+function createJobsStore() {
+  const jobs_map = writable<JobsMap>(new Map())
+
+  // True once the first `snapshot` event for the current connection has been
+  // processed. Lets the panel show a loading state until the stream syncs.
+  const synced = writable(false)
+
+  // Lifecycle of the underlying EventSource. The panel pairs this with `synced`
+  // to show a "can't connect / retrying" affordance instead of spinning forever
+  // when the stream errors before its first snapshot.
+  const connection = writable<JobsConnection>("idle")
+
+  let event_source: EventSource | null = null
+  let reconnect_timer: ReturnType<typeof setTimeout> | null = null
+  let subscriber_count = 0
+  let current_project_id: string | null = null
+
+  function build_url(): string {
+    const url = new URL(`${base_url}/api/jobs/events`)
+    if (current_project_id) {
+      url.searchParams.set("project_id", current_project_id)
+    }
+    return url.toString()
+  }
+
+  function upsert(record: JobRecord) {
+    jobs_map.update((map) => {
+      const next = new Map(map)
+      next.set(record.id, record)
+      return next
+    })
+  }
+
+  function remove(id: string) {
+    jobs_map.update((map) => {
+      if (!map.has(id)) {
+        return map
+      }
+      const next = new Map(map)
+      next.delete(id)
+      return next
+    })
+  }
+
+  function replace_all(records: JobRecord[]) {
+    const next: JobsMap = new Map()
+    for (const record of records) {
+      next.set(record.id, record)
+    }
+    jobs_map.set(next)
+  }
+
+  function handle_snapshot(event: MessageEvent) {
+    try {
+      const parsed = JSON.parse(event.data) as { jobs?: JobRecord[] }
+      replace_all(parsed.jobs ?? [])
+      synced.set(true)
+      connection.set("open")
+    } catch {
+      // Ignore malformed payloads; the next snapshot will re-sync.
+    }
+  }
+
+  function handle_job(event: MessageEvent) {
+    try {
+      const record = JSON.parse(event.data) as JobRecord
+      upsert(record)
+    } catch {
+      // Ignore malformed payloads.
+    }
+  }
+
+  function handle_deleted(event: MessageEvent) {
+    try {
+      const parsed = JSON.parse(event.data) as { id?: string }
+      if (parsed.id) {
+        remove(parsed.id)
+      }
+    } catch {
+      // Ignore malformed payloads.
+    }
+  }
+
+  function clear_reconnect() {
+    if (reconnect_timer !== null) {
+      clearTimeout(reconnect_timer)
+      reconnect_timer = null
+    }
+  }
+
+  function schedule_reconnect() {
+    if (reconnect_timer !== null || subscriber_count === 0) {
+      return
+    }
+    reconnect_timer = setTimeout(() => {
+      reconnect_timer = null
+      if (subscriber_count > 0) {
+        connect()
+      }
+    }, RECONNECT_DELAY_MS)
+  }
+
+  function close_source() {
+    if (event_source) {
+      event_source.close()
+      event_source = null
+    }
+  }
+
+  function connect() {
+    // Pure observer: opening or closing this stream never affects a job. A
+    // dropped connection is recovered by reconnecting; the fresh `snapshot`
+    // re-syncs the map (no Last-Event-ID needed).
+    const EventSourceCtor = globalThis.EventSource
+    if (!EventSourceCtor) {
+      return
+    }
+    close_source()
+    clear_reconnect()
+    synced.set(false)
+    connection.set("connecting")
+
+    const source = new EventSourceCtor(build_url())
+    event_source = source
+
+    source.addEventListener("snapshot", handle_snapshot as EventListener)
+    source.addEventListener("job", handle_job as EventListener)
+    source.addEventListener("deleted", handle_deleted as EventListener)
+    source.onerror = () => {
+      // Only reconnect if this is still the active source (avoids racing a
+      // teardown or a project switch).
+      if (event_source !== source) {
+        return
+      }
+      close_source()
+      connection.set("errored")
+      schedule_reconnect()
+    }
+  }
+
+  function disconnect() {
+    close_source()
+    clear_reconnect()
+    synced.set(false)
+    connection.set("idle")
+  }
+
+  // Re-open the stream against a new project filter. Called by the ui_state
+  // subscription below and exposed for tests.
+  function set_project(project_id: string | null) {
+    if (project_id === current_project_id) {
+      return
+    }
+    current_project_id = project_id
+    if (subscriber_count > 0) {
+      connect()
+    }
+  }
+
+  // Track the active project from UI state so the badge/panel stay scoped to
+  // the project the user is viewing. `ui_state` fires on any field change, so
+  // we react only when `current_project_id` actually differs from what we last
+  // saw — keeping rapid project switches correct (the old source is closed by
+  // `connect()` before the new one opens, so there's no leak).
+  current_project_id = get(ui_state).current_project_id ?? null
+  let last_seen_project_id = current_project_id
+  ui_state.subscribe((state) => {
+    const next = state.current_project_id ?? null
+    if (next === last_seen_project_id) {
+      return
+    }
+    last_seen_project_id = next
+    set_project(next)
+  })
+
+  const subscribe: Readable<JobsMap>["subscribe"] = (run, invalidate) => {
+    if (subscriber_count === 0) {
+      connect()
+    }
+    subscriber_count += 1
+    const unsubscribe = jobs_map.subscribe(run, invalidate)
+    return () => {
+      unsubscribe()
+      subscriber_count -= 1
+      if (subscriber_count <= 0) {
+        subscriber_count = 0
+        disconnect()
+      }
+    }
+  }
+
+  return {
+    subscribe,
+    synced: { subscribe: synced.subscribe } as Readable<boolean>,
+    connection: {
+      subscribe: connection.subscribe,
+    } as Readable<JobsConnection>,
+    set_project,
+    // Exposed for tests / explicit teardown; not part of normal usage.
+    _disconnect: disconnect,
+  }
+}
+
+export const jobs_store = createJobsStore()
+
+export const synced: Readable<boolean> = jobs_store.synced
+
+export const connection: Readable<JobsConnection> = jobs_store.connection
+
+export const jobs: Readable<JobRecord[]> = derived(jobs_store, ($map) =>
+  Array.from($map.values()).sort(
+    (a, b) =>
+      new Date(b.created_at ?? 0).getTime() -
+      new Date(a.created_at ?? 0).getTime(),
+  ),
+)
+
+export const active_jobs_count: Readable<number> = derived(
+  jobs_store,
+  ($map) => {
+    let count = 0
+    for (const job of $map.values()) {
+      if (is_active(job.status)) {
+        count += 1
+      }
+    }
+    return count
+  },
+)
diff --git a/app/web_ui/src/lib/ui/icons/jobs_icon.svelte b/app/web_ui/src/lib/ui/icons/jobs_icon.svelte
new file mode 100644
index 000000000..8e6f82c3f
--- /dev/null
+++ b/app/web_ui/src/lib/ui/icons/jobs_icon.svelte
@@ -0,0 +1,29 @@
+<svg
+  class="w-full h-full"
+  viewBox="0 0 24 24"
+  fill="none"
+  xmlns="http://www.w3.org/2000/svg"
+  aria-hidden="true"
+>
+  <circle cx="4.5" cy="6" r="1.25" fill="currentColor" />
+  <circle cx="4.5" cy="12" r="1.25" fill="currentColor" />
+  <circle cx="4.5" cy="18" r="1.25" fill="currentColor" />
+  <path
+    d="M9 6H20"
+    stroke="currentColor"
+    stroke-width="1.5"
+    stroke-linecap="round"
+  />
+  <path
+    d="M9 12H20"
+    stroke="currentColor"
+    stroke-width="1.5"
+    stroke-linecap="round"
+  />
+  <path
+    d="M9 18H16"
+    stroke="currentColor"
+    stroke-width="1.5"
+    stroke-linecap="round"
+  />
+</svg>
diff --git a/app/web_ui/src/routes/(app)/+layout.svelte b/app/web_ui/src/routes/(app)/+layout.svelte
index ac367455b..26e510c06 100644
--- a/app/web_ui/src/routes/(app)/+layout.svelte
+++ b/app/web_ui/src/routes/(app)/+layout.svelte
@@ -18,6 +18,10 @@
   import ToolsIcon from "$lib/ui/icons/tools_icon.svelte"
   import ChatBar from "./chat_bar.svelte"
   import ChatIcon from "$lib/ui/icons/chat_icon.svelte"
+  import JobsIcon from "$lib/ui/icons/jobs_icon.svelte"
+  import SidebarJobsIndicator from "$lib/components/SidebarJobsIndicator.svelte"
+  import JobsDialog from "$lib/components/jobs_dialog.svelte"
+  import { jobs_dialog } from "$lib/stores/jobs_dialog"
   import { Section } from "$lib/ui/section"
   import Dialog from "$lib/ui/dialog.svelte"
   import SidebarRail from "./sidebar_rail.svelte"
@@ -25,6 +29,11 @@
   import { chatBarExpanded } from "$lib/stores/chat_ui_state"
   import { derived } from "svelte/store"
   import DatabaseIcon from "$lib/ui/icons/database_icon.svelte"
+  import { env } from "$env/dynamic/public"
+
+  // Feature flag: the background Jobs UI (sidebar entry + dialog) only renders
+  // when PUBLIC_ENABLE_JOBS is explicitly "true". See .env.example.
+  const jobs_enabled = env.PUBLIC_ENABLE_JOBS === "true"
 
   // Rail-eligibility predicate: lg breakpoint, narrow viewport (< 1550px),
   // and chat bar expanded. See functional_spec.md "Trigger".
@@ -161,7 +170,11 @@
     ></label>
 
     {#if showRail}
-      <SidebarRail {section} openTaskDialog={() => taskDialog?.show()} />
+      <SidebarRail
+        {section}
+        {jobs_enabled}
+        openTaskDialog={() => taskDialog?.show()}
+      />
     {:else}
       <ul
         class="sidebar-menu menu bg-base-200 text-base-content w-72 md:w-52 2xl:w-56 p-3 pt-1 lg:pt-3 min-h-full text-xs"
@@ -435,6 +448,21 @@
             >
           </li>
         {/if}
+        {#if jobs_enabled}
+          <li class="menu-sm">
+            <button
+              type="button"
+              class="text-xs text-base-content"
+              on:click={() => jobs_dialog.open()}
+            >
+              <div class="sidebar-icon">
+                <JobsIcon />
+              </div>
+              Jobs
+              <SidebarJobsIndicator variant="inline" />
+            </button>
+          </li>
+        {/if}
         <li class="menu-sm">
           <a
             href="/settings"
@@ -473,6 +501,10 @@
   <SelectTasksMenu on:dismiss={() => taskDialog?.close()} />
 </Dialog>
 
+{#if jobs_enabled}
+  <JobsDialog />
+{/if}
+
 <style>
   :global(ul > li.menu-nested) {
     padding: 0;
diff --git a/app/web_ui/src/routes/(app)/jobs/+page.svelte b/app/web_ui/src/routes/(app)/jobs/+page.svelte
new file mode 100644
index 000000000..645011617
--- /dev/null
+++ b/app/web_ui/src/routes/(app)/jobs/+page.svelte
@@ -0,0 +1,65 @@
+<script lang="ts">
+  import AppPage from "../app_page.svelte"
+  import JobsTable from "$lib/components/jobs_table.svelte"
+  import { create_job } from "$lib/stores/jobs_api"
+  import { KilnError, createKilnError } from "$lib/utils/error_handlers"
+  import { agentInfo } from "$lib/agent"
+  import { ui_state } from "$lib/stores"
+
+  agentInfo.set({
+    name: "Background Jobs",
+    description:
+      "Background job panel. Lists jobs with status, progress, and lifecycle controls.",
+  })
+
+  let action_error: KilnError | null = null
+  let creating_test_job = false
+
+  // Kicks off a no-op job: a simulated long-running task (sleeps per step,
+  // streams progress, logs a couple of non-fatal errors) for exercising the
+  // panel end-to-end. The new job appears via the SSE stream — no local mutation.
+  async function start_test_job() {
+    action_error = null
+    creating_test_job = true
+    try {
+      await create_job(
+        "noop",
+        {
+          steps: 20,
+          sleep_per_step_seconds: 1,
+          error_at_steps: [4, 12],
+        },
+        null,
+        $ui_state.current_project_id,
+      )
+    } catch (e) {
+      action_error = createKilnError(e)
+    } finally {
+      creating_test_job = false
+    }
+  }
+
+  $: action_buttons = [
+    {
+      label: creating_test_job ? "Starting…" : "Start test job",
+      handler: start_test_job,
+      primary: true,
+      loading: creating_test_job,
+      disabled: creating_test_job,
+    },
+  ]
+</script>
+
+<AppPage
+  title="Jobs (temporary test page)"
+  subtitle="This page is a placeholder test to trigger jobs - will be removed before merging"
+  {action_buttons}
+>
+  {#if action_error}
+    <div role="alert" class="alert alert-error text-sm mb-4">
+      <span>{action_error.getMessage() || "An action failed."}</span>
+    </div>
+  {/if}
+
+  <JobsTable />
+</AppPage>
diff --git a/app/web_ui/src/routes/(app)/sidebar_rail.svelte b/app/web_ui/src/routes/(app)/sidebar_rail.svelte
index 1fc1d4c3c..7aca02253 100644
--- a/app/web_ui/src/routes/(app)/sidebar_rail.svelte
+++ b/app/web_ui/src/routes/(app)/sidebar_rail.svelte
@@ -8,9 +8,13 @@
   import SidebarRailSettings from "./sidebar_rail_settings.svelte"
   import ChatIcon from "$lib/ui/icons/chat_icon.svelte"
   import EvalIcon from "$lib/ui/icons/eval_icon.svelte"
+  import JobsIcon from "$lib/ui/icons/jobs_icon.svelte"
+  import SidebarJobsIndicator from "$lib/components/SidebarJobsIndicator.svelte"
+  import { jobs_dialog } from "$lib/stores/jobs_dialog"
 
   export let section: Section = Section.None
   export let openTaskDialog: () => void
+  export let jobs_enabled: boolean = false
 </script>
 
 <nav
@@ -115,5 +119,14 @@
 
   <SidebarRailProgress />
 
+  {#if jobs_enabled}
+    <SidebarRailItem on_click={() => jobs_dialog.open()} label="Jobs">
+      <div slot="icon" class="w-full h-full relative">
+        <JobsIcon />
+        <SidebarJobsIndicator variant="rail" />
+      </div>
+    </SidebarRailItem>
+  {/if}
+
   <SidebarRailSettings active={section === Section.Settings} />
 </nav>
diff --git a/app/web_ui/src/routes/(app)/sidebar_rail_item.svelte b/app/web_ui/src/routes/(app)/sidebar_rail_item.svelte
index 3dd541343..92d9ebc61 100644
--- a/app/web_ui/src/routes/(app)/sidebar_rail_item.svelte
+++ b/app/web_ui/src/routes/(app)/sidebar_rail_item.svelte
@@ -1,31 +1,56 @@
 <script lang="ts">
   import SidebarRailTooltip from "./sidebar_rail_tooltip.svelte"
 
-  export let href: string
+  // Either a navigation target (`href`) or a click handler (`on_click`). When
+  // `on_click` is set the item renders as a button instead of a link (used by
+  // the Jobs entry, which opens a dialog rather than navigating).
+  export let href: string | undefined = undefined
+  export let on_click: (() => void) | undefined = undefined
   export let active: boolean = false
   export let label: string
 
   let hovered = false
   let focused = false
   $: show_tooltip = hovered || focused
+
+  $: item_class = `relative flex items-center justify-center w-10 h-8 xl:h-9 rounded-md ${
+    active ? "bg-base-300" : "hover:bg-base-300/50"
+  }`
 </script>
 
 <div class="flex justify-center">
-  <a
-    {href}
-    class="relative flex items-center justify-center w-10 h-8 xl:h-9 rounded-md {active
-      ? 'bg-base-300'
-      : 'hover:bg-base-300/50'}"
-    aria-label={label}
-    aria-current={active ? "page" : undefined}
-    on:mouseenter={() => (hovered = true)}
-    on:mouseleave={() => (hovered = false)}
-    on:focus={() => (focused = true)}
-    on:blur={() => (focused = false)}
-  >
-    <span class="w-5 h-5 block">
-      <slot name="icon" />
-    </span>
-    <SidebarRailTooltip show={show_tooltip}>{label}</SidebarRailTooltip>
-  </a>
+  {#if on_click}
+    <button
+      type="button"
+      class={item_class}
+      aria-label={label}
+      aria-current={active ? "page" : undefined}
+      on:click={on_click}
+      on:mouseenter={() => (hovered = true)}
+      on:mouseleave={() => (hovered = false)}
+      on:focus={() => (focused = true)}
+      on:blur={() => (focused = false)}
+    >
+      <span class="w-5 h-5 block">
+        <slot name="icon" />
+      </span>
+      <SidebarRailTooltip show={show_tooltip}>{label}</SidebarRailTooltip>
+    </button>
+  {:else}
+    <a
+      {href}
+      class={item_class}
+      aria-label={label}
+      aria-current={active ? "page" : undefined}
+      on:mouseenter={() => (hovered = true)}
+      on:mouseleave={() => (hovered = false)}
+      on:focus={() => (focused = true)}
+      on:blur={() => (focused = false)}
+    >
+      <span class="w-5 h-5 block">
+        <slot name="icon" />
+      </span>
+      <SidebarRailTooltip show={show_tooltip}>{label}</SidebarRailTooltip>
+    </a>
+  {/if}
 </div>
diff --git a/app/web_ui/src/routes/(app)/sidebar_rail_item.test.ts b/app/web_ui/src/routes/(app)/sidebar_rail_item.test.ts
index 427831532..fc0182ef6 100644
--- a/app/web_ui/src/routes/(app)/sidebar_rail_item.test.ts
+++ b/app/web_ui/src/routes/(app)/sidebar_rail_item.test.ts
@@ -70,6 +70,19 @@ describe("SidebarRailItem", () => {
     expect(anchor?.className).toContain("hover:bg-base-300/50")
   })
 
+  it("renders a button that fires on_click when no href is given", async () => {
+    let clicked = 0
+    const { container } = render(SidebarRailItem, {
+      props: { on_click: () => (clicked += 1), label: "Jobs" },
+    })
+    expect(container.querySelector("a")).toBeNull()
+    const button = container.querySelector("button") as HTMLElement
+    expect(button).not.toBeNull()
+    expect(button.getAttribute("aria-label")).toBe("Jobs")
+    await fireEvent.click(button)
+    expect(clicked).toBe(1)
+  })
+
   it("keeps the visible tooltip pointer-events-none so clicks do not regress", async () => {
     // Regression guard: pre-portal the tooltip is a DOM descendant of the <a>,
     // so it must remain non-interactive or it could swallow clicks on the link.
diff --git a/app/web_ui/tests/e2e/act/discover/docs-library.spec.ts b/app/web_ui/tests/e2e/act/discover/docs-library.spec.ts
index e02cd3c1f..77fa9a18b 100644
--- a/app/web_ui/tests/e2e/act/discover/docs-library.spec.ts
+++ b/app/web_ui/tests/e2e/act/discover/docs-library.spec.ts
@@ -388,8 +388,10 @@ test.describe("Document library page", () => {
     const { project } = seededProjectWithTask
 
     await page.goto(`/docs/library/${project.id}`)
-    await page.waitForLoadState("networkidle")
 
+    // Note: don't wait for "networkidle" here — the app holds an always-on jobs
+    // SSE stream (/api/jobs/events) open for the lifetime of the page, so the
+    // network never goes idle. The assertion below auto-waits regardless.
     const breadcrumb = page
       .locator(".breadcrumbs")
       .getByRole("link", { name: "Docs & Search", exact: true })
diff --git a/libs/server/kiln_server/server.py b/libs/server/kiln_server/server.py
index 3ba93cde6..79dcc30e9 100644
--- a/libs/server/kiln_server/server.py
+++ b/libs/server/kiln_server/server.py
@@ -109,6 +109,10 @@ def _get_version() -> str:
         "name": "Settings & Utilities",
         "description": "Server settings, connectivity checks, and utility endpoints.",
     },
+    {
+        "name": "Jobs",
+        "description": "Run, monitor, and control background jobs, and stream their events.",
+    },
 ]
 
 
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/delete_api_jobs_id.json b/libs/server/kiln_server/utils/agent_checks/annotations/delete_api_jobs_id.json
new file mode 100644
index 000000000..32f0fd78e
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/delete_api_jobs_id.json
@@ -0,0 +1,9 @@
+{
+  "method": "delete",
+  "path": "/api/jobs/{id}",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": true,
+    "approval_description": "Allow agent to control background jobs (pause, resume, cancel, delete)?"
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs.json
new file mode 100644
index 000000000..e7f0de246
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_events.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_events.json
new file mode 100644
index 000000000..dfb42f0e7
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_events.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs/events",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id.json
new file mode 100644
index 000000000..858ca1150
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs/{id}",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_errors.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_errors.json
new file mode 100644
index 000000000..9df668758
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_errors.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs/{id}/errors",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_result.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_result.json
new file mode 100644
index 000000000..c9384bc76
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_result.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs/{id}/result",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_wait.json b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_wait.json
new file mode 100644
index 000000000..f13245b8c
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/get_api_jobs_id_wait.json
@@ -0,0 +1,8 @@
+{
+  "method": "get",
+  "path": "/api/jobs/{id}/wait",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_cancel.json b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_cancel.json
new file mode 100644
index 000000000..f48df2706
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_cancel.json
@@ -0,0 +1,9 @@
+{
+  "method": "post",
+  "path": "/api/jobs/{id}/cancel",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": true,
+    "approval_description": "Allow agent to control background jobs (pause, resume, cancel, delete)?"
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_pause.json b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_pause.json
new file mode 100644
index 000000000..bbb24cb9c
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_pause.json
@@ -0,0 +1,9 @@
+{
+  "method": "post",
+  "path": "/api/jobs/{id}/pause",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": true,
+    "approval_description": "Allow agent to control background jobs (pause, resume, cancel, delete)?"
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_resume.json b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_resume.json
new file mode 100644
index 000000000..0291b1c58
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_id_resume.json
@@ -0,0 +1,9 @@
+{
+  "method": "post",
+  "path": "/api/jobs/{id}/resume",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": true,
+    "approval_description": "Allow agent to control background jobs (pause, resume, cancel, delete)?"
+  }
+}
diff --git a/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_type.json b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_type.json
new file mode 100644
index 000000000..7583bb379
--- /dev/null
+++ b/libs/server/kiln_server/utils/agent_checks/annotations/post_api_jobs_type.json
@@ -0,0 +1,8 @@
+{
+  "method": "post",
+  "path": "/api/jobs/{type}",
+  "agent_policy": {
+    "permission": "allow",
+    "requires_approval": false
+  }
+}