From 8290cfe3c30ea6af5917efeb43d26d089aa21a69 Mon Sep 17 00:00:00 2001 From: "Leonard Q. Marcq" Date: Thu, 28 May 2026 18:44:15 +0800 Subject: [PATCH 01/26] feat: background job system --- app/desktop/desktop_server.py | 2 + app/desktop/git_sync/middleware.py | 6 +- app/desktop/git_sync/save_context.py | 66 ++ app/desktop/git_sync/test_save_context.py | 219 ++++++ app/desktop/studio_server/jobs/__init__.py | 0 app/desktop/studio_server/jobs/api.py | 308 ++++++++ app/desktop/studio_server/jobs/error_log.py | 67 ++ app/desktop/studio_server/jobs/events.py | 105 +++ app/desktop/studio_server/jobs/models.py | 180 +++++ app/desktop/studio_server/jobs/registry.py | 479 ++++++++++++ app/desktop/studio_server/jobs/test_api.py | 704 +++++++++++++++++ .../studio_server/jobs/test_error_log.py | 67 ++ app/desktop/studio_server/jobs/test_events.py | 90 +++ .../studio_server/jobs/test_registry.py | 723 ++++++++++++++++++ .../studio_server/jobs/workers/__init__.py | 0 .../studio_server/jobs/workers/eval.py | 136 ++++ .../studio_server/jobs/workers/noop.py | 47 ++ .../studio_server/jobs/workers/test_eval.py | 535 +++++++++++++ app/web_ui/src/lib/api_schema.d.ts | 630 +++++++++++++++ .../lib/components/SidebarJobsBadge.svelte | 32 + .../lib/components/SidebarJobsBadge.test.ts | 40 + app/web_ui/src/lib/stores/job_status.test.ts | 128 ++++ app/web_ui/src/lib/stores/job_status.ts | 109 +++ app/web_ui/src/lib/stores/jobs_api.test.ts | 150 ++++ app/web_ui/src/lib/stores/jobs_api.ts | 118 +++ app/web_ui/src/lib/stores/jobs_store.test.ts | 305 ++++++++ app/web_ui/src/lib/stores/jobs_store.ts | 244 ++++++ app/web_ui/src/lib/ui/icons/jobs_icon.svelte | 23 + app/web_ui/src/lib/ui/section.ts | 1 + app/web_ui/src/routes/(app)/+layout.svelte | 14 + app/web_ui/src/routes/(app)/jobs/+page.svelte | 362 +++++++++ .../src/routes/(app)/sidebar_rail.svelte | 9 + libs/server/kiln_server/server.py | 4 + .../background_job_system/architecture.md | 108 +++ .../background_job_system/functional_spec.md | 350 +++++++++ .../implementation_plan.md | 14 + .../phase_plans/phase_1.md | 186 +++++ .../phase_plans/phase_2.md | 169 ++++ .../phase_plans/phase_3.md | 133 ++++ .../phase_plans/phase_4.md | 200 +++++ .../background_job_system/project_overview.md | 48 ++ 41 files changed, 7110 insertions(+), 1 deletion(-) create mode 100644 app/desktop/git_sync/save_context.py create mode 100644 app/desktop/git_sync/test_save_context.py create mode 100644 app/desktop/studio_server/jobs/__init__.py create mode 100644 app/desktop/studio_server/jobs/api.py create mode 100644 app/desktop/studio_server/jobs/error_log.py create mode 100644 app/desktop/studio_server/jobs/events.py create mode 100644 app/desktop/studio_server/jobs/models.py create mode 100644 app/desktop/studio_server/jobs/registry.py create mode 100644 app/desktop/studio_server/jobs/test_api.py create mode 100644 app/desktop/studio_server/jobs/test_error_log.py create mode 100644 app/desktop/studio_server/jobs/test_events.py create mode 100644 app/desktop/studio_server/jobs/test_registry.py create mode 100644 app/desktop/studio_server/jobs/workers/__init__.py create mode 100644 app/desktop/studio_server/jobs/workers/eval.py create mode 100644 app/desktop/studio_server/jobs/workers/noop.py create mode 100644 app/desktop/studio_server/jobs/workers/test_eval.py create mode 100644 app/web_ui/src/lib/components/SidebarJobsBadge.svelte create mode 100644 app/web_ui/src/lib/components/SidebarJobsBadge.test.ts create mode 100644 app/web_ui/src/lib/stores/job_status.test.ts create mode 100644 app/web_ui/src/lib/stores/job_status.ts create mode 100644 app/web_ui/src/lib/stores/jobs_api.test.ts create mode 100644 app/web_ui/src/lib/stores/jobs_api.ts create mode 100644 app/web_ui/src/lib/stores/jobs_store.test.ts create mode 100644 app/web_ui/src/lib/stores/jobs_store.ts create mode 100644 app/web_ui/src/lib/ui/icons/jobs_icon.svelte create mode 100644 app/web_ui/src/routes/(app)/jobs/+page.svelte create mode 100644 specs/projects/background_job_system/architecture.md create mode 100644 specs/projects/background_job_system/functional_spec.md create mode 100644 specs/projects/background_job_system/implementation_plan.md create mode 100644 specs/projects/background_job_system/phase_plans/phase_1.md create mode 100644 specs/projects/background_job_system/phase_plans/phase_2.md create mode 100644 specs/projects/background_job_system/phase_plans/phase_3.md create mode 100644 specs/projects/background_job_system/phase_plans/phase_4.md create mode 100644 specs/projects/background_job_system/project_overview.md diff --git a/app/desktop/desktop_server.py b/app/desktop/desktop_server.py index 0163c146c..639bbe27e 100644 --- a/app/desktop/desktop_server.py +++ b/app/desktop/desktop_server.py @@ -33,6 +33,7 @@ from app.desktop.studio_server.eval_api import connect_evals_api from app.desktop.studio_server.finetune_api import connect_fine_tune_api from app.desktop.studio_server.import_api import connect_import_api +from app.desktop.studio_server.jobs.api import connect_jobs_api from app.desktop.studio_server.prompt_api import connect_prompt_api from app.desktop.studio_server.prompt_optimization_job_api import ( connect_prompt_optimization_job_api, @@ -142,6 +143,7 @@ def make_app(tk_root: tk.Tk | None = None): connect_agent_api(app) connect_dev_tools(app) connect_chat_api(app) + connect_jobs_api(app) # Important: webhost must be last, it handles all other URLs connect_webhost(app) return app diff --git a/app/desktop/git_sync/middleware.py b/app/desktop/git_sync/middleware.py index bca2b3ae6..900f89cf2 100644 --- a/app/desktop/git_sync/middleware.py +++ b/app/desktop/git_sync/middleware.py @@ -351,7 +351,11 @@ def _resolve_endpoint(self, request: Request) -> Callable[..., Any] | None: return None def _get_manager_for_request(self, request: Request) -> GitSyncManager | None: - """Extract project_id from URL, resolve to path, return manager if auto-sync enabled.""" + """Extract project_id from URL, resolve to path, return manager if auto-sync enabled. + + Keep the project_id -> manager resolution below in sync with the request-free + copy in save_context.get_manager_for_project (used by background job workers). + """ match = PROJECT_ID_PATTERN.match(request.url.path) if match is None: return None diff --git a/app/desktop/git_sync/save_context.py b/app/desktop/git_sync/save_context.py new file mode 100644 index 000000000..5ce24bedd --- /dev/null +++ b/app/desktop/git_sync/save_context.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from pathlib import Path + +from kiln_ai.utils.git_sync_protocols import SaveContext + +from app.desktop.git_sync.config import get_git_sync_config, project_path_from_id +from app.desktop.git_sync.git_sync_manager import GitSyncManager +from app.desktop.git_sync.registry import GitSyncRegistry + + +def get_manager_for_project(project_id: str) -> GitSyncManager | None: + """Resolve a project_id to its GitSyncManager when auto-sync is active. + + Request-free mirror of GitSyncMiddleware._get_manager_for_request (minus the + URL parsing). Returns None for every "not active" branch: the project has no + path, no git-sync config, sync_mode is not "auto", or no clone_path is set. + + Config is keyed by project_path; the manager is keyed by clone_path. The + manager is always obtained via GitSyncRegistry.get_or_create so the single + per-clone-path manager (and its executor + non-reentrant write lock) is + shared with the HTTP path. + """ + project_path = project_path_from_id(project_id) + if project_path is None: + return None + + config = get_git_sync_config(project_path) + if config is None: + return None + + if config["sync_mode"] != "auto": + return None + + clone_path = config.get("clone_path") + if clone_path is None: + return None + + return GitSyncRegistry.get_or_create( + repo_path=Path(clone_path), + remote_name=config["remote_name"], + pat_token=config.get("pat_token"), + oauth_token=config.get("oauth_token"), + auth_mode=config["auth_mode"], + ) + + +def save_context_for_project(project_id: str, context: str) -> SaveContext | None: + """Return a SaveContext wrapping writes in manager.atomic_write(context=...), + or None when git sync is not active for this project. + + Mirrors build_save_context(request) for callers that have only a project_id + (e.g. background job workers). Runners coalesce None to a no-op context. + """ + manager = get_manager_for_project(project_id) + if manager is None: + return None + + bg_sync = GitSyncRegistry.get_background_sync(manager.repo_path) + if bg_sync is not None: + bg_sync.notify_request() + + def factory(): + return manager.atomic_write(context=context) + + return factory diff --git a/app/desktop/git_sync/test_save_context.py b/app/desktop/git_sync/test_save_context.py new file mode 100644 index 000000000..a26d4590a --- /dev/null +++ b/app/desktop/git_sync/test_save_context.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +from contextlib import ExitStack, asynccontextmanager +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from app.desktop.git_sync.config import GitSyncProjectConfig +from app.desktop.git_sync.save_context import ( + get_manager_for_project, + save_context_for_project, +) + +PROJECT_ID = "project_abc" +PROJECT_PATH = "/tmp/test/project.kiln" +CLONE_PATH = "/tmp/test/clone" + + +def _auto_config(clone_path: str | None = CLONE_PATH) -> GitSyncProjectConfig: + return GitSyncProjectConfig( + sync_mode="auto", + auth_mode="system_keys", + remote_name="origin", + branch="main", + clone_path=clone_path, + git_url=None, + pat_token=None, + oauth_token=None, + ) + + +def _manual_config() -> GitSyncProjectConfig: + return GitSyncProjectConfig( + sync_mode="manual", + auth_mode="system_keys", + remote_name="origin", + branch="main", + clone_path=CLONE_PATH, + git_url=None, + pat_token=None, + oauth_token=None, + ) + + +class _FakeManager: + """Minimal AtomicWriteCapable stand-in that records atomic_write calls.""" + + def __init__(self, repo_path: Path = Path(CLONE_PATH)): + self.repo_path = repo_path + self.calls: list[str] = [] + self.entered = False + + @asynccontextmanager + async def atomic_write(self, context: str): + self.calls.append(context) + self.entered = True + yield + + +def _patch_resolution(project_path, config, manager=None, bg_sync=None): + """Patch the config + registry calls used by the helper. + + project_path_from_id and get_git_sync_config are looked up in the + save_context module namespace, so patch them there. + """ + stack = ExitStack() + stack.enter_context( + patch( + "app.desktop.git_sync.save_context.project_path_from_id", + return_value=project_path, + ) + ) + stack.enter_context( + patch( + "app.desktop.git_sync.save_context.get_git_sync_config", + return_value=config, + ) + ) + stack.enter_context( + patch( + "app.desktop.git_sync.save_context.GitSyncRegistry.get_or_create", + return_value=manager, + ) + ) + stack.enter_context( + patch( + "app.desktop.git_sync.save_context.GitSyncRegistry.get_background_sync", + return_value=bg_sync, + ) + ) + return stack + + +# -- None branches ----------------------------------------------------------- + + +def test_returns_none_when_no_project_path(): + with _patch_resolution(project_path=None, config=None): + assert save_context_for_project(PROJECT_ID, context="ctx") is None + assert get_manager_for_project(PROJECT_ID) is None + + +def test_returns_none_when_no_git_sync_config(): + with _patch_resolution(project_path=PROJECT_PATH, config=None): + assert save_context_for_project(PROJECT_ID, context="ctx") is None + assert get_manager_for_project(PROJECT_ID) is None + + +def test_returns_none_when_sync_mode_not_auto(): + with _patch_resolution(project_path=PROJECT_PATH, config=_manual_config()): + assert save_context_for_project(PROJECT_ID, context="ctx") is None + assert get_manager_for_project(PROJECT_ID) is None + + +def test_returns_none_when_clone_path_missing(): + with _patch_resolution( + project_path=PROJECT_PATH, config=_auto_config(clone_path=None) + ): + assert save_context_for_project(PROJECT_ID, context="ctx") is None + assert get_manager_for_project(PROJECT_ID) is None + + +# -- active branches --------------------------------------------------------- + + +def test_get_manager_uses_registry_with_config_values(): + manager = _FakeManager() + with ( + patch( + "app.desktop.git_sync.save_context.project_path_from_id", + return_value=PROJECT_PATH, + ), + patch( + "app.desktop.git_sync.save_context.get_git_sync_config", + return_value=_auto_config(), + ), + patch( + "app.desktop.git_sync.save_context.GitSyncRegistry.get_or_create", + return_value=manager, + ) as mock_get_or_create, + ): + result = get_manager_for_project(PROJECT_ID) + + assert result is manager + mock_get_or_create.assert_called_once_with( + repo_path=Path(CLONE_PATH), + remote_name="origin", + pat_token=None, + oauth_token=None, + auth_mode="system_keys", + ) + + +async def test_save_context_enters_atomic_write_with_label(): + manager = _FakeManager() + with _patch_resolution( + project_path=PROJECT_PATH, config=_auto_config(), manager=manager + ): + save_context = save_context_for_project(PROJECT_ID, context="eval job e1/r1") + + assert save_context is not None + assert manager.entered is False # built lazily, not yet entered + + async with save_context(): + pass + + assert manager.calls == ["eval job e1/r1"] + + +def test_save_context_notifies_background_sync(): + manager = _FakeManager() + bg_sync = MagicMock() + with _patch_resolution( + project_path=PROJECT_PATH, + config=_auto_config(), + manager=manager, + bg_sync=bg_sync, + ): + save_context = save_context_for_project(PROJECT_ID, context="ctx") + + assert save_context is not None + bg_sync.notify_request.assert_called_once() + + +def test_save_context_no_background_sync_is_fine(): + manager = _FakeManager() + with _patch_resolution( + project_path=PROJECT_PATH, + config=_auto_config(), + manager=manager, + bg_sync=None, + ): + save_context = save_context_for_project(PROJECT_ID, context="ctx") + + assert save_context is not None + + +# -- error propagation ------------------------------------------------------- + + +def test_propagates_when_config_lookup_raises(): + # A corrupt/raising config lookup must surface (failing the job) rather than + # be swallowed to None, which would silently skip commits for an auto-sync + # project — the very bug this resolver exists to prevent. + with ( + patch( + "app.desktop.git_sync.save_context.project_path_from_id", + return_value=PROJECT_PATH, + ), + patch( + "app.desktop.git_sync.save_context.get_git_sync_config", + side_effect=RuntimeError("corrupt config"), + ), + ): + with pytest.raises(RuntimeError, match="corrupt config"): + get_manager_for_project(PROJECT_ID) + with pytest.raises(RuntimeError, match="corrupt config"): + save_context_for_project(PROJECT_ID, context="ctx") diff --git a/app/desktop/studio_server/jobs/__init__.py b/app/desktop/studio_server/jobs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/app/desktop/studio_server/jobs/api.py b/app/desktop/studio_server/jobs/api.py new file mode 100644 index 000000000..ec66c6fcd --- /dev/null +++ b/app/desktop/studio_server/jobs/api.py @@ -0,0 +1,308 @@ +from __future__ import annotations + +import asyncio +import json +from datetime import datetime +from typing import Annotated, Any, AsyncGenerator + +from fastapi import FastAPI, HTTPException, Path, Query, Response +from kiln_server.cancellable_streaming_response import CancellableStreamingResponse +from kiln_server.utils.agent_checks.policy import ( + ALLOW_AGENT, + agent_policy_require_approval, +) +from pydantic import BaseModel, Field, ValidationError + +from . import error_log +from .events import JobEvent +from .models import BackgroundJobStatus, JobRecord +from .registry import ( + JobNotFoundError, + JobOperationError, + job_registry, +) +from .workers.eval import EvalJobWorker +from .workers.noop import NoopJobWorker + +KEEPALIVE_SECONDS = 15.0 + +_JOB_MUTATION_APPROVAL = agent_policy_require_approval( + "Allow agent to control background jobs (pause, resume, cancel, delete)?" +) + + +class CreateJobRequest(BaseModel): + """Request body for creating a job. Params are validated per job type.""" + + params: dict[str, Any] = Field( + default_factory=dict, + description="Type-specific job parameters, validated against the type's params model.", + ) + project_id: str | None = Field( + default=None, + description="Project to scope this job to (for filtering/visibility). " + "Falls back to the params' project_id when omitted.", + ) + metadata: dict[str, Any] | None = Field( + default=None, + description="Free-form pass-through attribution, stored verbatim.", + ) + + +class CreateJobResponse(BaseModel): + """Response returned when a job is created.""" + + job_id: str = Field(description="The id of the newly created job.") + status: BackgroundJobStatus = Field( + description="The job's status immediately after creation." + ) + + +def _project_id_from_params(validated_params: BaseModel) -> str | None: + return getattr(validated_params, "project_id", None) + + +def _format_sse(event: JobEvent) -> str: + return f"event: {event.event}\ndata: {json.dumps(event.data)}\n\n" + + +async def _event_stream( + job_id: str | None, + type_name: str | None, + project_id: str | None, +): + """Pure-observer SSE generator. + + Subscribes to the registry event bus and forwards snapshot/job/deleted + events, injecting a keepalive comment between events. Closing this generator + (client disconnect, via CancellableStreamingResponse) only unsubscribes from + the bus — it never touches any job's supervising task. Jobs keep running. + """ + subscription: AsyncGenerator[JobEvent, None] = job_registry.events.subscribe( + job_id=job_id, + type_name=type_name, + project_id=project_id, + ) + try: + while True: + try: + event = await asyncio.wait_for( + subscription.__anext__(), timeout=KEEPALIVE_SECONDS + ) + except asyncio.TimeoutError: + yield ": ping\n\n" + continue + except StopAsyncIteration: + break + yield _format_sse(event) + finally: + await subscription.aclose() + + +def connect_jobs_api(app: FastAPI) -> None: + # Register the workers this server exposes. register_type overwrites by + # type_name, so repeated calls (e.g. multiple make_app() in tests) are safe. + job_registry.register_type(NoopJobWorker) + job_registry.register_type(EvalJobWorker) + + @app.get( + "/api/jobs/events", + summary="Stream Job Events", + tags=["Jobs"], + openapi_extra=ALLOW_AGENT, + ) + async def stream_job_events( + job_id: Annotated[ + str | None, Query(description="Only stream events for this job id.") + ] = None, + type: Annotated[ + str | None, Query(description="Only stream events for this job type.") + ] = None, + project_id: Annotated[ + str | None, Query(description="Only stream events for this project id.") + ] = None, + ) -> CancellableStreamingResponse: + """Server-sent events for jobs. Emits an initial `snapshot`, then per-job + `job` and `deleted` events. A pure observer: disconnecting never stops a job.""" + return CancellableStreamingResponse( + content=_event_stream(job_id, type, project_id), + media_type="text/event-stream", + ) + + @app.get( + "/api/jobs", + summary="List Jobs", + tags=["Jobs"], + openapi_extra=ALLOW_AGENT, + ) + async def list_jobs( + status: Annotated[ + BackgroundJobStatus | None, Query(description="Filter by job status.") + ] = None, + type: Annotated[str | None, Query(description="Filter by job type.")] = None, + project_id: Annotated[ + str | None, Query(description="Filter by project id.") + ] = None, + since: Annotated[ + datetime | None, + Query(description="Only jobs created at or after this ISO-8601 time."), + ] = None, + limit: Annotated[ + int | None, Query(description="Maximum number of jobs to return.") + ] = None, + ) -> list[JobRecord]: + return job_registry.list_jobs( + status=status, + type_name=type, + project_id=project_id, + since=since, + limit=limit, + ) + + @app.post( + "/api/jobs/{type}", + summary="Create Job", + tags=["Jobs"], + status_code=201, + openapi_extra=ALLOW_AGENT, + ) + async def create_job( + type: Annotated[str, Path(description="The registered job type to run.")], + request: CreateJobRequest, + ) -> CreateJobResponse: + try: + worker = job_registry.worker_for(type) + except JobOperationError: + raise HTTPException(status_code=404, detail=f"Unknown job type: {type}") + + try: + validated = worker.params_model.model_validate(request.params) + except ValidationError as exc: + raise HTTPException(status_code=422, detail=exc.errors()) + + job = await job_registry.create( + type_name=type, + params=validated, + project_id=request.project_id or _project_id_from_params(validated), + metadata=request.metadata, + ) + return CreateJobResponse(job_id=job.id, status=job.status) + + @app.get( + "/api/jobs/{id}", + summary="Get Job", + tags=["Jobs"], + openapi_extra=ALLOW_AGENT, + ) + async def get_job( + id: Annotated[str, Path(description="The job id.")], + ) -> JobRecord: + job = await job_registry.get(id) + if job is None: + raise HTTPException(status_code=404, detail=f"Job not found: {id}") + return job + + @app.get( + "/api/jobs/{id}/result", + summary="Get Job Result", + tags=["Jobs"], + openapi_extra=ALLOW_AGENT, + ) + async def get_job_result( + id: Annotated[str, Path(description="The job id.")], + ) -> dict[str, Any]: + job = await job_registry.get(id) + if job is None: + raise HTTPException(status_code=404, detail=f"Job not found: {id}") + if not job.status.is_terminal or job.result is None: + raise HTTPException( + status_code=404, detail="No result available for this job." + ) + return job.result + + @app.get( + "/api/jobs/{id}/errors", + summary="Get Job Errors", + tags=["Jobs"], + openapi_extra=ALLOW_AGENT, + ) + async def get_job_errors( + id: Annotated[str, Path(description="The job id.")], + run_id: Annotated[ + str | None, + Query(description="Read the error log for a specific past run id."), + ] = None, + ) -> list[dict[str, Any]]: + # Always 200, never errors (functional_spec §5). A plain non-reconciling + # lookup of the current run_id — we don't recompute state for a + # best-effort diagnostic read. + resolved_run_id = run_id or job_registry.run_id_for(id) + if resolved_run_id is None: + return [] + return error_log.read_errors(resolved_run_id) + + @app.post( + "/api/jobs/{id}/pause", + summary="Pause Job", + tags=["Jobs"], + status_code=202, + openapi_extra=_JOB_MUTATION_APPROVAL, + ) + async def pause_job( + id: Annotated[str, Path(description="The job id.")], + ) -> Response: + await _run_lifecycle(job_registry.pause, id) + return Response(status_code=202) + + @app.post( + "/api/jobs/{id}/resume", + summary="Resume Job", + tags=["Jobs"], + status_code=202, + openapi_extra=_JOB_MUTATION_APPROVAL, + ) + async def resume_job( + id: Annotated[str, Path(description="The job id.")], + ) -> Response: + await _run_lifecycle(job_registry.resume, id) + return Response(status_code=202) + + @app.post( + "/api/jobs/{id}/cancel", + summary="Cancel Job", + tags=["Jobs"], + status_code=202, + openapi_extra=_JOB_MUTATION_APPROVAL, + ) + async def cancel_job( + id: Annotated[str, Path(description="The job id.")], + ) -> Response: + await _run_lifecycle(job_registry.cancel, id) + return Response(status_code=202) + + @app.delete( + "/api/jobs/{id}", + summary="Delete Job", + tags=["Jobs"], + status_code=204, + openapi_extra=_JOB_MUTATION_APPROVAL, + ) + async def delete_job( + id: Annotated[str, Path(description="The job id.")], + ) -> Response: + await _run_lifecycle(job_registry.delete, id) + return Response(status_code=204) + + +async def _run_lifecycle(operation, job_id: str) -> Any: + """Invoke a registry lifecycle op, mapping its exceptions to HTTP status. + + JobNotFoundError -> 404, JobOperationError (invalid transition / unsupported + pause / delete in-flight) -> 409. + """ + try: + return await operation(job_id) + except JobNotFoundError: + raise HTTPException(status_code=404, detail=f"Job not found: {job_id}") + except JobOperationError as exc: + raise HTTPException(status_code=409, detail=str(exc)) diff --git a/app/desktop/studio_server/jobs/error_log.py b/app/desktop/studio_server/jobs/error_log.py new file mode 100644 index 000000000..6e8e23715 --- /dev/null +++ b/app/desktop/studio_server/jobs/error_log.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import json +import tempfile +from pathlib import Path +from typing import Any + +ERROR_LOG_DIR_NAME = "kiln_jobs" + + +def error_log_dir() -> Path: + return Path(tempfile.gettempdir()) / ERROR_LOG_DIR_NAME + + +def error_log_path(run_id: str) -> Path: + return error_log_dir() / f"{run_id}.json" + + +def append_error(run_id: str, entry: dict[str, Any]) -> None: + """Append a single error entry to this run's log (JSON Lines). Best-effort. + + Creates the directory lazily. Any IO/serialization failure is swallowed — + the error log is a diagnostic convenience, never a guarantee. + """ + try: + directory = error_log_dir() + directory.mkdir(parents=True, exist_ok=True) + line = json.dumps(entry, ensure_ascii=False) + with error_log_path(run_id).open("a", encoding="utf-8") as f: + f.write(line + "\n") + except Exception: + pass + + +def read_errors(run_id: str) -> list[dict[str, Any]]: + """Read the error log for a run as a list of objects. Best-effort. + + A missing or unreadable file returns []. Individual unparsable lines are + skipped rather than failing the whole read. Never raises. + """ + entries: list[dict[str, Any]] = [] + try: + path = error_log_path(run_id) + if not path.exists(): + return [] + with path.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + parsed = json.loads(line) + except (ValueError, TypeError): + continue + if isinstance(parsed, dict): + entries.append(parsed) + except Exception: + return entries + return entries + + +def delete_errors(run_id: str) -> None: + """Best-effort remove the error log file for a run. Swallows all errors.""" + try: + error_log_path(run_id).unlink(missing_ok=True) + except Exception: + pass diff --git a/app/desktop/studio_server/jobs/events.py b/app/desktop/studio_server/jobs/events.py new file mode 100644 index 000000000..b85f0f2d1 --- /dev/null +++ b/app/desktop/studio_server/jobs/events.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import asyncio +from typing import Any, AsyncGenerator, Callable, Literal + +from pydantic import BaseModel + +from .models import JobRecord + + +class JobEvent(BaseModel): + """A single bus event. Per-job events carry the full record (idempotent snapshot).""" + + event: Literal["snapshot", "job", "deleted"] + data: dict[str, Any] + + +class _Subscriber: + def __init__( + self, + job_id: str | None, + type_name: str | None, + project_id: str | None, + ) -> None: + self.queue: asyncio.Queue[JobEvent] = asyncio.Queue() + self.job_id = job_id + self.type_name = type_name + self.project_id = project_id + + def matches( + self, + record_id: str | None, + record_type: str | None, + record_project_id: str | None, + ) -> bool: + if self.job_id is not None and self.job_id != record_id: + return False + if self.type_name is not None and self.type_name != record_type: + return False + if self.project_id is not None and self.project_id != record_project_id: + return False + return True + + +SnapshotProvider = Callable[[], list[JobRecord]] + + +class JobEventBus: + """In-process async pub/sub bus feeding the SSE endpoint (Phase 2). + + Subscribers receive an initial `snapshot` event, then per-job `job` events + and `deleted` tombstones, filtered by job_id / type / project_id. + """ + + def __init__(self, snapshot_provider: SnapshotProvider | None = None) -> None: + self._subscribers: set[_Subscriber] = set() + self._snapshot_provider = snapshot_provider + + def set_snapshot_provider(self, provider: SnapshotProvider) -> None: + self._snapshot_provider = provider + + def _filtered_snapshot(self, subscriber: _Subscriber) -> list[JobRecord]: + if self._snapshot_provider is None: + return [] + return [ + record + for record in self._snapshot_provider() + if subscriber.matches(record.id, record.type, record.project_id) + ] + + async def subscribe( + self, + job_id: str | None = None, + type_name: str | None = None, + project_id: str | None = None, + ) -> AsyncGenerator[JobEvent, None]: + subscriber = _Subscriber(job_id, type_name, project_id) + self._subscribers.add(subscriber) + try: + snapshot = self._filtered_snapshot(subscriber) + yield JobEvent( + event="snapshot", + data={"jobs": [r.model_dump(mode="json") for r in snapshot]}, + ) + while True: + yield await subscriber.queue.get() + finally: + self._subscribers.discard(subscriber) + + def publish_job(self, record: JobRecord) -> None: + event = JobEvent(event="job", data=record.model_dump(mode="json")) + for subscriber in self._subscribers: + if subscriber.matches(record.id, record.type, record.project_id): + subscriber.queue.put_nowait(event) + + def publish_deleted( + self, + job_id: str, + type_name: str | None = None, + project_id: str | None = None, + ) -> None: + event = JobEvent(event="deleted", data={"id": job_id}) + for subscriber in self._subscribers: + if subscriber.matches(job_id, type_name, project_id): + subscriber.queue.put_nowait(event) diff --git a/app/desktop/studio_server/jobs/models.py b/app/desktop/studio_server/jobs/models.py new file mode 100644 index 000000000..7262934a4 --- /dev/null +++ b/app/desktop/studio_server/jobs/models.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from enum import Enum +from typing import ( + Any, + Awaitable, + Callable, + ClassVar, + Generic, + TypeVar, +) + +from pydantic import BaseModel, Field + + +def _utc_now() -> datetime: + return datetime.now(timezone.utc) + + +class BackgroundJobStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + PAUSED = "paused" + SUCCEEDED = "succeeded" + FAILED = "failed" + CANCELLED = "cancelled" + + @property + def is_terminal(self) -> bool: + return self in TERMINAL_STATUSES + + +TERMINAL_STATUSES = frozenset( + { + BackgroundJobStatus.SUCCEEDED, + BackgroundJobStatus.FAILED, + BackgroundJobStatus.CANCELLED, + } +) + + +class JobProgress(BaseModel): + """Count-based progress for a job. + + Processed = success + error; remaining = total - success - error. The error + field is a count only — the actual messages live in the per-run error log. + """ + + total: int | None = None + success: int = 0 + error: int = 0 + message: str | None = None + updated_at: datetime = Field(default_factory=_utc_now) + + +class JobDerivedState(BaseModel): + """A worker's view of the operation's true state, read from source-of-truth entities.""" + + total: int | None = None + success: int = 0 + error: int = 0 + is_complete: bool = False + message: str | None = None + + +class JobError(BaseModel): + """Small failure summary stamped on the record. Detail lives in the error log.""" + + error: str | None = None + detail: dict[str, Any] | None = None + + +class JobRecord(BaseModel): + """Ephemeral, in-memory bookkeeping for a single job. Never persisted to disk.""" + + id: str + type: str + status: BackgroundJobStatus + run_id: str | None = None + progress: JobProgress = Field(default_factory=JobProgress) + params: dict[str, Any] = Field(default_factory=dict) + result: dict[str, Any] | None = None + error: JobError | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + project_id: str | None = None + supports_pause: bool = False + created_at: datetime = Field(default_factory=_utc_now) + updated_at: datetime = Field(default_factory=_utc_now) + started_at: datetime | None = None + ended_at: datetime | None = None + + +ReportProgress = Callable[["JobProgressUpdate"], Awaitable[None]] +ReportError = Callable[[str, dict[str, Any]], Awaitable[None]] + + +class JobProgressUpdate(BaseModel): + success: int + error: int = 0 + total: int | None = None + message: str | None = None + + +class JobContext: + """Provided to the worker by JobRegistry during run(). + + Holds the current job_id and run_id, plus registry-injected callbacks for + reporting progress (in-memory snapshot + event) and per-item errors (error log). + """ + + def __init__( + self, + job_id: str, + run_id: str, + report_progress: ReportProgress, + report_error: ReportError, + ) -> None: + self.job_id = job_id + self.run_id = run_id + self._report_progress = report_progress + self._report_error = report_error + + async def report_progress( + self, + success: int, + error: int = 0, + total: int | None = None, + message: str | None = None, + ) -> None: + """Update the registry's in-memory progress snapshot and emit an event. + + A UI-smoothing signal only — the authoritative progress comes from + compute_state(). Cheap to call often. + """ + await self._report_progress( + JobProgressUpdate( + success=success, + error=error, + total=total, + message=message, + ) + ) + + async def report_error(self, error_message: str, **extra: Any) -> None: + """Append one structured error entry to this run's error log. + + For non-fatal per-item errors that don't stop the run. Best-effort: a + failed write is swallowed, never propagated. Does not itself bump the + progress error count — report that via report_progress. + """ + await self._report_error(error_message, extra) + + +TParams = TypeVar("TParams", bound=BaseModel) +TResult = TypeVar("TResult", bound=BaseModel) + + +class JobWorker(Generic[TParams, TResult]): + type_name: ClassVar[str] + params_model: ClassVar[type[BaseModel]] + result_model: ClassVar[type[BaseModel]] + supports_pause: ClassVar[bool] = False + + async def compute_state(self, params: TParams) -> JobDerivedState | None: + """Read source-of-truth Kiln entities and return the operation's true state. + + MUST be a pure read — no side effects, idempotent, safe to call any time. + Return None only when the worker has no backing entity to consult (e.g. + the NoopJob fixture); the registry then keeps the last believed snapshot. + Real workers must override this. + """ + return None + + async def run(self, params: TParams, ctx: JobContext) -> TResult: + """MUST be idempotent. Covers both first run and resume — the registry + calls run() again to resume a paused job; the worker re-orients via + compute_state(), not a handed-in checkpoint. + """ + raise NotImplementedError diff --git a/app/desktop/studio_server/jobs/registry.py b/app/desktop/studio_server/jobs/registry.py new file mode 100644 index 000000000..e8d37d55b --- /dev/null +++ b/app/desktop/studio_server/jobs/registry.py @@ -0,0 +1,479 @@ +from __future__ import annotations + +import asyncio +import logging +import os +import secrets +import traceback +import uuid +from datetime import datetime +from typing import Any + +from pydantic import BaseModel + +from . import error_log +from .events import JobEventBus +from .models import ( + BackgroundJobStatus, + JobContext, + JobDerivedState, + JobError, + JobProgress, + JobProgressUpdate, + JobRecord, + JobWorker, + _utc_now, +) + +logger = logging.getLogger(__name__) + +DEFAULT_MAX_CONCURRENT = 10 +MAX_CONCURRENT_ENV_VAR = "KILN_JOBS_MAX_CONCURRENT" + +_JOB_ID_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567" +_JOB_ID_LENGTH = 12 + + +class JobNotFoundError(Exception): + pass + + +class JobOperationError(Exception): + """Raised for invalid lifecycle operations (e.g. pause a non-running job). + + Phase 2 maps these to 409 Conflict. + """ + + +def _new_job_id() -> str: + suffix = "".join(secrets.choice(_JOB_ID_ALPHABET) for _ in range(_JOB_ID_LENGTH)) + return f"j_{suffix}" + + +def _resolve_max_concurrent(explicit: int | None) -> int: + if explicit is not None: + return explicit + raw = os.environ.get(MAX_CONCURRENT_ENV_VAR) + if raw: + try: + value = int(raw) + if value > 0: + return value + except ValueError: + pass + return DEFAULT_MAX_CONCURRENT + + +class JobRegistry: + """In-memory registry owning job lifecycle, concurrency, and reconciliation. + + Singleton per process. The in-memory index is the only store — no disk + persistence of state. Supervising tasks are owned here and decoupled from any + HTTP connection. + """ + + def __init__(self, max_concurrent: int | None = None) -> None: + self._max_concurrent = _resolve_max_concurrent(max_concurrent) + self._workers: dict[str, JobWorker] = {} + self._jobs: dict[str, JobRecord] = {} + self._tasks: dict[str, asyncio.Task] = {} + self._pending_ids: list[str] = [] + self._cancel_intent: set[str] = set() + self._pause_intent: set[str] = set() + # Job ids whose supervising task received a real (delivered-to-a-live- + # task) cancellation. Distinguishes "worker swallowed a cancel" from + # "worker finished before any cancel landed" when the worker returns + # normally — the former must transition to paused/cancelled, the latter + # must keep its succeeded result. + self._cancel_delivered: set[str] = set() + self._running_count = 0 + self.events = JobEventBus(snapshot_provider=self._snapshot) + + # -- registration -------------------------------------------------------- + + def register_type(self, worker_cls: type[JobWorker]) -> None: + worker = worker_cls() + self._workers[worker_cls.type_name] = worker + + def worker_for(self, type_name: str) -> JobWorker: + worker = self._workers.get(type_name) + if worker is None: + raise JobOperationError(f"Unknown job type: {type_name}") + return worker + + # -- snapshots / reads --------------------------------------------------- + + def _snapshot(self) -> list[JobRecord]: + return list(self._jobs.values()) + + def _require(self, job_id: str) -> JobRecord: + job = self._jobs.get(job_id) + if job is None: + raise JobNotFoundError(job_id) + return job + + async def get(self, job_id: str) -> JobRecord | None: + job = self._jobs.get(job_id) + if job is None: + return None + await self._reconcile(job, emit_on_change=True) + return job + + def run_id_for(self, job_id: str) -> str | None: + """Current run_id for a job, or None if unknown. A plain read — no + reconciliation (used by the best-effort errors endpoint).""" + job = self._jobs.get(job_id) + return job.run_id if job is not None else None + + def list_jobs( + self, + status: BackgroundJobStatus | None = None, + type_name: str | None = None, + project_id: str | None = None, + since: datetime | None = None, + limit: int | None = None, + ) -> list[JobRecord]: + records = list(self._jobs.values()) + if status is not None: + records = [r for r in records if r.status == status] + if type_name is not None: + records = [r for r in records if r.type == type_name] + if project_id is not None: + records = [r for r in records if r.project_id == project_id] + if since is not None: + records = [r for r in records if r.created_at >= since] + records.sort(key=lambda r: r.created_at, reverse=True) + if limit is not None: + records = records[:limit] + return records + + # -- create -------------------------------------------------------------- + + async def create( + self, + type_name: str, + params: dict[str, Any] | BaseModel, + project_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> JobRecord: + worker = self.worker_for(type_name) + validated = self._validate_params(worker, params) + job_id = self._fresh_job_id() + job = JobRecord( + id=job_id, + type=type_name, + status=BackgroundJobStatus.PENDING, + params=validated.model_dump(mode="json"), + metadata=metadata or {}, + project_id=project_id, + supports_pause=worker.supports_pause, + ) + self._jobs[job_id] = job + self._pending_ids.append(job_id) + self._emit(job) + self._dispatch_pending() + return job + + def _fresh_job_id(self) -> str: + job_id = _new_job_id() + while job_id in self._jobs: + job_id = _new_job_id() + return job_id + + def _validate_params( + self, worker: JobWorker, params: dict[str, Any] | BaseModel + ) -> BaseModel: + if isinstance(params, worker.params_model): + return params + if isinstance(params, BaseModel): + params = params.model_dump() + return worker.params_model.model_validate(params) + + # -- dispatch / supervision --------------------------------------------- + + def _dispatch_pending(self) -> None: + while self._running_count < self._max_concurrent and self._pending_ids: + job_id = self._pending_ids.pop(0) + job = self._jobs.get(job_id) + if job is None or job.status != BackgroundJobStatus.PENDING: + continue + self._launch(job) + + def _launch(self, job: JobRecord) -> None: + worker = self.worker_for(job.type) + run_id = str(uuid.uuid4()) + job.run_id = run_id + job.status = BackgroundJobStatus.RUNNING + job.started_at = _utc_now() + self._touch(job) + self._running_count += 1 + self._emit(job) + task = asyncio.create_task(self._supervise(job.id, worker, run_id)) + self._tasks[job.id] = task + + async def _supervise(self, job_id: str, worker: JobWorker, run_id: str) -> None: + job = self._jobs.get(job_id) + if job is None: + return + params = worker.params_model.model_validate(job.params) + ctx = self._build_context(job_id, run_id) + try: + try: + await self._reconcile(job, emit_on_change=True) + if job.status == BackgroundJobStatus.SUCCEEDED: + return + result = await worker.run(params, ctx) + # The cancellation transition is unconditional (functional_spec + # §2): a worker that catches CancelledError for cleanup and then + # returns normally — even one that calls task.uncancel() so it is + # never re-raised — must still land in paused/cancelled, not + # succeeded. The registry enforces this off its own delivery + # record rather than trusting the worker to re-raise. A worker + # that finished naturally before any cancel landed has no + # delivery recorded, so its result stands. + if job_id in self._cancel_delivered: + self._finish_cancelled_or_paused(job) + else: + self._finish_succeeded(job, result) + except asyncio.CancelledError: + self._finish_cancelled_or_paused(job) + raise + except Exception as exc: + self._finish_failed(job, run_id, exc) + finally: + self._release_slot(job_id) + + def _build_context(self, job_id: str, run_id: str) -> JobContext: + async def report_progress(update: JobProgressUpdate) -> None: + job = self._jobs.get(job_id) + if job is None or job.run_id != run_id: + return + job.progress = JobProgress( + total=update.total if update.total is not None else job.progress.total, + success=update.success, + error=update.error, + message=update.message + if update.message is not None + else job.progress.message, + ) + self._touch(job) + self._emit(job) + + async def report_error(message: str, extra: dict[str, Any]) -> None: + error_log.append_error(run_id, {"error_message": message, **extra}) + + return JobContext(job_id, run_id, report_progress, report_error) + + def _finish_succeeded(self, job: JobRecord, result: BaseModel) -> None: + job.status = BackgroundJobStatus.SUCCEEDED + job.result = result.model_dump(mode="json") + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + + def _finish_failed(self, job: JobRecord, run_id: str, exc: Exception) -> None: + job.status = BackgroundJobStatus.FAILED + job.error = JobError(error=str(exc) or exc.__class__.__name__) + job.ended_at = _utc_now() + self._touch(job) + error_log.append_error( + run_id, + { + "error_message": str(exc) or exc.__class__.__name__, + "traceback": "".join( + traceback.format_exception(type(exc), exc, exc.__traceback__) + ), + "fatal": True, + }, + ) + self._emit(job) + + def _finish_cancelled_or_paused(self, job: JobRecord) -> None: + if job.id in self._pause_intent: + job.status = BackgroundJobStatus.PAUSED + else: + job.status = BackgroundJobStatus.CANCELLED + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + + # -- lifecycle controls -------------------------------------------------- + + async def pause(self, job_id: str) -> JobRecord: + job = self._require(job_id) + if not job.supports_pause: + raise JobOperationError(f"Job type '{job.type}' does not support pause") + if job.status != BackgroundJobStatus.RUNNING: + raise JobOperationError( + f"Cannot pause a job in status '{job.status.value}'" + ) + self._pause_intent.add(job_id) + await self._cancel_task(job_id) + # If run() completed naturally during the cancel await, the job is + # already terminal — leave that state intact rather than forcing paused. + if job.status.is_terminal: + return job + if job.status != BackgroundJobStatus.PAUSED: + job.status = BackgroundJobStatus.PAUSED + self._touch(job) + worker = self.worker_for(job.type) + params = worker.params_model.model_validate(job.params) + derived = await worker.compute_state(params) + self._apply_derived(job, derived) + self._emit(job) + return job + + async def resume(self, job_id: str) -> JobRecord: + job = self._require(job_id) + if job.status != BackgroundJobStatus.PAUSED: + raise JobOperationError( + f"Cannot resume a job in status '{job.status.value}'" + ) + worker = self.worker_for(job.type) + params = worker.params_model.model_validate(job.params) + derived = await worker.compute_state(params) + if derived is not None and derived.is_complete: + self._apply_derived(job, derived) + job.status = BackgroundJobStatus.SUCCEEDED + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + return job + self._apply_derived(job, derived) + job.status = BackgroundJobStatus.PENDING + self._touch(job) + self._pending_ids.append(job_id) + self._emit(job) + self._dispatch_pending() + return job + + async def cancel(self, job_id: str) -> JobRecord: + job = self._require(job_id) + if job.status.is_terminal: + raise JobOperationError( + f"Cannot cancel a job in status '{job.status.value}'" + ) + if job.status == BackgroundJobStatus.PENDING: + self._remove_pending(job_id) + job.status = BackgroundJobStatus.CANCELLED + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + return job + if job.status == BackgroundJobStatus.PAUSED: + job.status = BackgroundJobStatus.CANCELLED + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + return job + self._cancel_intent.add(job_id) + await self._cancel_task(job_id) + if not job.status.is_terminal: + job.status = BackgroundJobStatus.CANCELLED + job.ended_at = _utc_now() + self._touch(job) + self._emit(job) + return self._jobs[job_id] + + async def delete(self, job_id: str) -> None: + job = self._require(job_id) + if not job.status.is_terminal: + raise JobOperationError( + f"Cannot delete a job in status '{job.status.value}'" + ) + self._jobs.pop(job_id, None) + self._remove_pending(job_id) + if job.run_id is not None: + error_log.delete_errors(job.run_id) + self.events.publish_deleted(job_id, job.type, job.project_id) + + async def _cancel_task(self, job_id: str) -> None: + task = self._tasks.get(job_id) + if task is None: + return + # cancel() returns True only if the request landed on a not-yet-done + # task — i.e. the cancellation is actually delivered to the worker. If + # it returns False the worker already finished naturally; we must not + # override that terminal result. + if task.cancel(): + self._cancel_delivered.add(job_id) + try: + await task + except asyncio.CancelledError: + pass + except Exception: + # The worker raised while we awaited its cancellation. _supervise + # already routed this to the failed/terminal state and logged it; + # we only debug-log here so it isn't silently discarded. + logger.debug( + "Worker for job %s raised during cancel await", job_id, exc_info=True + ) + # If the task was cancelled before its coroutine body ever ran, its own + # finally never executed, so reclaim the slot here. Idempotent: whoever + # pops job_id from _tasks first owns the single decrement. + self._release_slot(job_id) + + def _release_slot(self, job_id: str) -> None: + if self._tasks.pop(job_id, None) is None: + return + self._cancel_intent.discard(job_id) + self._pause_intent.discard(job_id) + self._cancel_delivered.discard(job_id) + self._running_count -= 1 + self._dispatch_pending() + + def _remove_pending(self, job_id: str) -> None: + try: + self._pending_ids.remove(job_id) + except ValueError: + pass + + # -- reconciliation ------------------------------------------------------ + + async def _reconcile(self, job: JobRecord, emit_on_change: bool) -> bool: + worker = self._workers.get(job.type) + if worker is None: + return False + params = worker.params_model.model_validate(job.params) + derived = await worker.compute_state(params) + if derived is None: + return False + changed = self._apply_derived(job, derived) + if derived.is_complete and not job.status.is_terminal: + job.status = BackgroundJobStatus.SUCCEEDED + job.ended_at = _utc_now() + self._touch(job) + changed = True + if changed and emit_on_change: + self._emit(job) + return changed + + def _apply_derived(self, job: JobRecord, derived: JobDerivedState | None) -> bool: + if derived is None: + return False + new_progress = JobProgress( + total=derived.total if derived.total is not None else job.progress.total, + success=derived.success, + error=derived.error, + message=derived.message + if derived.message is not None + else job.progress.message, + ) + before = job.progress.model_dump(exclude={"updated_at"}) + after = new_progress.model_dump(exclude={"updated_at"}) + if before == after: + return False + job.progress = new_progress + self._touch(job) + return True + + # -- helpers ------------------------------------------------------------- + + def _touch(self, job: JobRecord) -> None: + job.updated_at = _utc_now() + + def _emit(self, job: JobRecord) -> None: + self.events.publish_job(job) + + +job_registry = JobRegistry() diff --git a/app/desktop/studio_server/jobs/test_api.py b/app/desktop/studio_server/jobs/test_api.py new file mode 100644 index 000000000..9e5429b91 --- /dev/null +++ b/app/desktop/studio_server/jobs/test_api.py @@ -0,0 +1,704 @@ +from __future__ import annotations + +import asyncio +import json +import uuid + +import httpx +import pytest +import pytest_asyncio +from app.desktop.studio_server.jobs import api as jobs_api +from app.desktop.studio_server.jobs import error_log +from app.desktop.studio_server.jobs.api import connect_jobs_api +from app.desktop.studio_server.jobs.models import ( + BackgroundJobStatus, + JobDerivedState, + JobWorker, +) +from app.desktop.studio_server.jobs.registry import JobOperationError, JobRegistry +from app.desktop.studio_server.jobs.workers.noop import NoopJobWorker +from fastapi import FastAPI +from pydantic import BaseModel + + +async def _safe_cancel(registry: JobRegistry, job_id: str) -> None: + """Best-effort cleanup cancel; ignore a job that already reached terminal.""" + try: + await registry.cancel(job_id) + except JobOperationError: + pass + + +@pytest.fixture(autouse=True) +def temp_error_log_dir(tmp_path, monkeypatch): + monkeypatch.setattr( + "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir", + lambda: str(tmp_path), + ) + + +# -- supporting test workers ------------------------------------------------- + + +class _ProjectParams(BaseModel): + project_id: str + steps: int = 50 + sleep_per_step_seconds: float = 0.05 + + +class _EmptyResult(BaseModel): + pass + + +class ProjectScopedWorker(JobWorker[_ProjectParams, _EmptyResult]): + """A worker whose params carry a project_id, so the record gets one.""" + + type_name = "project_scoped" + params_model = _ProjectParams + result_model = _EmptyResult + supports_pause = True + + async def run(self, params, ctx): + await asyncio.sleep(5) + return _EmptyResult() + + +class _EmptyParams(BaseModel): + pass + + +class ReconcileCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]): + """compute_state flips to complete once `done` is set, so a GET reconciles + the running job straight to succeeded.""" + + type_name = "reconcile_complete" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + done = False + + async def compute_state(self, params): + complete = type(self).done + return JobDerivedState( + total=3, success=3 if complete else 1, error=0, is_complete=complete + ) + + async def run(self, params, ctx): + await asyncio.sleep(5) + return _EmptyResult() + + +class NonPausableWorker(JobWorker[_EmptyParams, _EmptyResult]): + type_name = "nonpausable" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = False + + async def run(self, params, ctx): + await asyncio.sleep(5) + return _EmptyResult() + + +# -- fixtures ---------------------------------------------------------------- + + +@pytest.fixture +def registry(monkeypatch): + """Patch a fresh registry in for isolation, then register the test workers.""" + reg = JobRegistry(max_concurrent=10) + monkeypatch.setattr(jobs_api, "job_registry", reg) + reg.register_type(NoopJobWorker) + reg.register_type(ProjectScopedWorker) + reg.register_type(ReconcileCompleteWorker) + reg.register_type(NonPausableWorker) + return reg + + +@pytest.fixture +def fast_keepalive(monkeypatch): + # httpx's ASGITransport batches the SSE generator's output and only surfaces + # buffered lines once the next chunk (here, the keepalive ping) forces a + # flush. Shortening the keepalive makes that flush — and stream teardown — + # prompt in tests. Production keeps the 15s default. + monkeypatch.setattr(jobs_api, "KEEPALIVE_SECONDS", 0.1) + + +@pytest.fixture +def app(registry): + app = FastAPI() + connect_jobs_api(app) + return app + + +@pytest_asyncio.fixture +async def client(app): + # Async client over ASGI so handlers AND the registry's background tasks + # share the test's event loop — background jobs progress while we await. + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient( + transport=transport, base_url="http://test" + ) as http_client: + yield http_client + + +async def _wait_for_status( + registry: JobRegistry, + job_id: str, + target: BackgroundJobStatus | set[BackgroundJobStatus], + timeout: float = 3.0, +) -> None: + targets = {target} if isinstance(target, BackgroundJobStatus) else target + deadline = asyncio.get_event_loop().time() + timeout + while asyncio.get_event_loop().time() < deadline: + job = registry._jobs.get(job_id) + if job is not None and job.status in targets: + return + await asyncio.sleep(0.01) + job = registry._jobs.get(job_id) + actual = job.status if job else "missing" + raise AssertionError(f"Job {job_id} did not reach {targets}; was {actual}") + + +async def _create_noop(client, **params) -> str: + body = {"steps": 50, "sleep_per_step_seconds": 0.05} + body.update(params) + resp = await client.post("/api/jobs/noop", json={"params": body}) + assert resp.status_code == 201, resp.text + return resp.json()["job_id"] + + +# -- create ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_create_returns_201_and_status(client): + resp = await client.post( + "/api/jobs/noop", + json={"params": {"steps": 3, "sleep_per_step_seconds": 0.01}}, + ) + assert resp.status_code == 201 + body = resp.json() + assert body["job_id"].startswith("j_") + assert body["status"] in ("pending", "running") + + +@pytest.mark.asyncio +async def test_create_unknown_type_404(client): + resp = await client.post("/api/jobs/does_not_exist", json={"params": {}}) + assert resp.status_code == 404 + assert "Unknown job type" in resp.json()["detail"] + + +@pytest.mark.asyncio +async def test_create_invalid_params_422(client): + resp = await client.post("/api/jobs/noop", json={"params": {"steps": "not-an-int"}}) + assert resp.status_code == 422 + + +@pytest.mark.asyncio +async def test_create_stores_metadata_and_project_id(client, registry): + resp = await client.post( + "/api/jobs/project_scoped", + json={"params": {"project_id": "p_abc"}, "metadata": {"source": "test"}}, + ) + assert resp.status_code == 201 + job_id = resp.json()["job_id"] + record = registry._jobs[job_id] + assert record.project_id == "p_abc" + assert record.metadata == {"source": "test"} + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_create_noop_has_null_project_id(client, registry): + job_id = await _create_noop(client) + assert registry._jobs[job_id].project_id is None + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_create_explicit_project_id_scopes_typeless_job(client, registry): + # A job whose params carry no project_id (noop) still gets scoped when the + # request body sets project_id explicitly — this is what the project-filtered + # jobs panel / SSE stream rely on to show such jobs. + resp = await client.post( + "/api/jobs/noop", + json={ + "params": {"steps": 50, "sleep_per_step_seconds": 0.05}, + "project_id": "p_explicit", + }, + ) + assert resp.status_code == 201 + job_id = resp.json()["job_id"] + assert registry._jobs[job_id].project_id == "p_explicit" + rows = (await client.get("/api/jobs", params={"project_id": "p_explicit"})).json() + assert any(r["id"] == job_id for r in rows) + await registry.cancel(job_id) + + +# -- list -------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_empty(client): + resp = await client.get("/api/jobs") + assert resp.status_code == 200 + assert resp.json() == [] + + +@pytest.mark.asyncio +async def test_list_returns_jobs_sorted_desc(client, registry): + first = await _create_noop(client) + second = await _create_noop(client) + resp = await client.get("/api/jobs") + assert resp.status_code == 200 + ids = [r["id"] for r in resp.json()] + assert ids[0] == second + assert ids[1] == first + await registry.cancel(first) + await registry.cancel(second) + + +@pytest.mark.asyncio +async def test_list_filter_by_type(client, registry): + await _create_noop(client) + await client.post("/api/jobs/project_scoped", json={"params": {"project_id": "p1"}}) + resp = await client.get("/api/jobs", params={"type": "project_scoped"}) + assert resp.status_code == 200 + rows = resp.json() + assert len(rows) == 1 + assert rows[0]["type"] == "project_scoped" + + +@pytest.mark.asyncio +async def test_list_filter_by_status(client, registry): + job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.get("/api/jobs", params={"status": "succeeded"}) + assert [r["id"] for r in resp.json()] == [job_id] + resp = await client.get("/api/jobs", params={"status": "running"}) + assert resp.json() == [] + + +@pytest.mark.asyncio +async def test_list_filter_by_project_id(client): + await client.post( + "/api/jobs/project_scoped", json={"params": {"project_id": "p_one"}} + ) + await client.post( + "/api/jobs/project_scoped", json={"params": {"project_id": "p_two"}} + ) + resp = await client.get("/api/jobs", params={"project_id": "p_one"}) + rows = resp.json() + assert len(rows) == 1 + assert rows[0]["project_id"] == "p_one" + + +@pytest.mark.asyncio +async def test_list_limit(client): + for _ in range(3): + await _create_noop(client) + resp = await client.get("/api/jobs", params={"limit": 2}) + assert len(resp.json()) == 2 + + +@pytest.mark.asyncio +async def test_list_since_excludes_older(client, registry): + old_id = await _create_noop(client) + newer_id = await _create_noop(client) + cutoff = registry._jobs[newer_id].created_at.isoformat() + resp = await client.get("/api/jobs", params={"since": cutoff}) + ids = [r["id"] for r in resp.json()] + assert newer_id in ids + assert old_id not in ids + + +# -- get --------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_returns_record(client, registry): + job_id = await _create_noop(client) + resp = await client.get(f"/api/jobs/{job_id}") + assert resp.status_code == 200 + body = resp.json() + assert body["id"] == job_id + assert body["type"] == "noop" + assert "progress" in body + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_get_unknown_404(client): + resp = await client.get("/api/jobs/j_missing") + assert resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_get_reconciles_to_succeeded(client, registry): + ReconcileCompleteWorker.done = False + resp = await client.post("/api/jobs/reconcile_complete", json={"params": {}}) + job_id = resp.json()["job_id"] + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + ReconcileCompleteWorker.done = True + got = await client.get(f"/api/jobs/{job_id}") + assert got.status_code == 200 + assert got.json()["status"] == "succeeded" + assert got.json()["progress"]["success"] == 3 + + +# -- result ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_result_200_when_terminal(client, registry): + job_id = await _create_noop(client, steps=3, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.get(f"/api/jobs/{job_id}/result") + assert resp.status_code == 200 + assert resp.json() == {"completed_steps": 3} + + +@pytest.mark.asyncio +async def test_result_404_when_not_terminal(client, registry): + job_id = await _create_noop(client) + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + resp = await client.get(f"/api/jobs/{job_id}/result") + assert resp.status_code == 404 + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_result_404_unknown(client): + resp = await client.get("/api/jobs/j_missing/result") + assert resp.status_code == 404 + + +# -- errors ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_errors_returns_array(client, registry): + resp = await client.post( + "/api/jobs/noop", + json={ + "params": { + "steps": 4, + "sleep_per_step_seconds": 0.01, + "error_at_steps": [1, 3], + } + }, + ) + job_id = resp.json()["job_id"] + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.get(f"/api/jobs/{job_id}/errors") + assert resp.status_code == 200 + messages = [e["error_message"] for e in resp.json()] + assert "intentional error at step 1" in messages + assert "intentional error at step 3" in messages + + +@pytest.mark.asyncio +async def test_errors_empty_when_none(client, registry): + job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.get(f"/api/jobs/{job_id}/errors") + assert resp.status_code == 200 + assert resp.json() == [] + + +@pytest.mark.asyncio +async def test_errors_unknown_job_returns_empty_200(client): + resp = await client.get("/api/jobs/j_missing/errors") + assert resp.status_code == 200 + assert resp.json() == [] + + +@pytest.mark.asyncio +async def test_errors_specific_run_id(client): + run_id = str(uuid.uuid4()) + error_log.append_error(run_id, {"error_message": "from a past run"}) + resp = await client.get("/api/jobs/j_missing/errors", params={"run_id": run_id}) + assert resp.status_code == 200 + assert resp.json() == [{"error_message": "from a past run"}] + + +# -- pause / resume / cancel ------------------------------------------------- + + +@pytest.mark.asyncio +async def test_pause_then_resume(client, registry): + job_id = await _create_noop(client, steps=50, sleep_per_step_seconds=0.03) + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + + resp = await client.post(f"/api/jobs/{job_id}/pause") + assert resp.status_code == 202 + assert registry._jobs[job_id].status == BackgroundJobStatus.PAUSED + + resp = await client.post(f"/api/jobs/{job_id}/resume") + assert resp.status_code == 202 + assert registry._jobs[job_id].status in ( + BackgroundJobStatus.PENDING, + BackgroundJobStatus.RUNNING, + ) + + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_pause_409_when_not_running(client, registry): + job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.post(f"/api/jobs/{job_id}/pause") + assert resp.status_code == 409 + + +@pytest.mark.asyncio +async def test_pause_409_when_unsupported(client, registry): + resp = await client.post("/api/jobs/nonpausable", json={"params": {}}) + job_id = resp.json()["job_id"] + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + resp = await client.post(f"/api/jobs/{job_id}/pause") + assert resp.status_code == 409 + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_pause_unknown_404(client): + resp = await client.post("/api/jobs/j_missing/pause") + assert resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_resume_409_when_not_paused(client, registry): + job_id = await _create_noop(client) + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + resp = await client.post(f"/api/jobs/{job_id}/resume") + assert resp.status_code == 409 + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_cancel_202(client, registry): + job_id = await _create_noop(client) + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + resp = await client.post(f"/api/jobs/{job_id}/cancel") + assert resp.status_code == 202 + assert registry._jobs[job_id].status == BackgroundJobStatus.CANCELLED + + +@pytest.mark.asyncio +async def test_cancel_409_when_terminal(client, registry): + job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.post(f"/api/jobs/{job_id}/cancel") + assert resp.status_code == 409 + + +@pytest.mark.asyncio +async def test_cancel_unknown_404(client): + resp = await client.post("/api/jobs/j_missing/cancel") + assert resp.status_code == 404 + + +# -- delete ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_delete_204_when_terminal(client, registry): + job_id = await _create_noop(client, steps=2, sleep_per_step_seconds=0.01) + await _wait_for_status(registry, job_id, BackgroundJobStatus.SUCCEEDED) + resp = await client.delete(f"/api/jobs/{job_id}") + assert resp.status_code == 204 + assert job_id not in registry._jobs + assert (await client.get("/api/jobs")).json() == [] + + +@pytest.mark.asyncio +async def test_delete_409_when_in_flight(client, registry): + job_id = await _create_noop(client) + await _wait_for_status(registry, job_id, BackgroundJobStatus.RUNNING) + resp = await client.delete(f"/api/jobs/{job_id}") + assert resp.status_code == 409 + await registry.cancel(job_id) + + +@pytest.mark.asyncio +async def test_delete_unknown_404(client): + resp = await client.delete("/api/jobs/j_missing") + assert resp.status_code == 404 + + +# -- wiring ------------------------------------------------------------------ + + +def test_connect_jobs_api_registers_noop_idempotently(monkeypatch): + reg = JobRegistry(max_concurrent=2) + monkeypatch.setattr(jobs_api, "job_registry", reg) + app = FastAPI() + connect_jobs_api(app) + connect_jobs_api(app) # second call must not raise + assert "noop" in reg._workers + + +# -- SSE --------------------------------------------------------------------- + + +def test_format_sse_wire_format(): + from app.desktop.studio_server.jobs.events import JobEvent + + event = JobEvent(event="job", data={"id": "j_abc", "status": "running"}) + wire = jobs_api._format_sse(event) + assert wire == 'event: job\ndata: {"id": "j_abc", "status": "running"}\n\n' + + +@pytest.mark.asyncio +async def test_event_stream_forwards_snapshot_then_job(registry): + # Unit-level test of the generator (independent of any HTTP transport): a + # subscriber gets the initial snapshot, and a job created afterward produces + # a `job` event. Proves pure-observer forwarding of the Phase 1 bus. + stream = jobs_api._event_stream(job_id=None, type_name=None, project_id=None) + try: + first = await asyncio.wait_for(stream.__anext__(), timeout=3.0) + assert first.startswith("event: snapshot\n") + + job = await registry.create( + "noop", {"steps": 40, "sleep_per_step_seconds": 0.05} + ) + # Drain until we see a job event for our job. + deadline = asyncio.get_event_loop().time() + 3.0 + saw_job = False + while asyncio.get_event_loop().time() < deadline: + chunk = await asyncio.wait_for(stream.__anext__(), timeout=3.0) + if chunk.startswith("event: job\n") and job.id in chunk: + saw_job = True + break + assert saw_job + await _safe_cancel(registry, job.id) + finally: + await stream.aclose() + + +def _parse_sse_block(block: str) -> tuple[str | None, dict | None]: + event_name: str | None = None + data: dict | None = None + for line in block.splitlines(): + if line.startswith("event:"): + event_name = line[len("event:") :].strip() + elif line.startswith("data:"): + data = json.loads(line[len("data:") :].strip()) + return event_name, data + + +async def _read_until_event(line_iter, target: str, timeout: float = 3.0) -> dict: + """Read SSE blocks from a shared line iterator until one matches the target + event name; return its data. httpx allows streaming the body only once, so a + single iterator must be threaded through all reads on a response.""" + buffer = "" + while True: + line = await asyncio.wait_for(line_iter.__anext__(), timeout=timeout) + if line == "": + event_name, data = _parse_sse_block(buffer) + buffer = "" + if event_name == target and data is not None: + return data + else: + buffer += line + "\n" + + +@pytest.mark.asyncio +async def test_sse_empty_snapshot(app, fast_keepalive): + # Connecting with no jobs yields an empty snapshot. (httpx's ASGITransport + # sends http.disconnect right after the GET body, so we only assert the + # initial snapshot here; live-event delivery is covered below with a job + # that is already running before we connect.) + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient( + transport=transport, base_url="http://test" + ) as http_client: + async with http_client.stream("GET", "/api/jobs/events") as response: + assert response.status_code == 200 + assert response.headers["content-type"].startswith("text/event-stream") + snapshot = await _read_until_event(response.aiter_lines(), "snapshot") + assert snapshot == {"jobs": []} + + +@pytest.mark.asyncio +async def test_sse_snapshot_then_job_event(app, registry, fast_keepalive): + # Start a long-running job first, so it appears in the snapshot and keeps + # emitting live `job` progress events while we observe the stream. + job = await registry.create("noop", {"steps": 40, "sleep_per_step_seconds": 0.05}) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient( + transport=transport, base_url="http://test" + ) as http_client: + async with http_client.stream("GET", "/api/jobs/events") as response: + assert response.status_code == 200 + assert response.headers["content-type"].startswith("text/event-stream") + lines = response.aiter_lines() + + snapshot = await _read_until_event(lines, "snapshot") + assert [j["id"] for j in snapshot["jobs"]] == [job.id] + + data = await _read_until_event(lines, "job") + assert data["id"] == job.id + assert data["type"] == "noop" + + await _safe_cancel(registry, job.id) + + +@pytest.mark.asyncio +async def test_sse_filters_by_job_id(app, registry, fast_keepalive): + # Both jobs run; only `target`'s events should reach a job_id-filtered stream. + other = await registry.create("noop", {"steps": 40, "sleep_per_step_seconds": 0.05}) + target = await registry.create( + "noop", {"steps": 40, "sleep_per_step_seconds": 0.05} + ) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient( + transport=transport, base_url="http://test" + ) as http_client: + async with http_client.stream( + "GET", "/api/jobs/events", params={"job_id": target.id} + ) as response: + lines = response.aiter_lines() + snapshot = await _read_until_event(lines, "snapshot") + snapshot_ids = {j["id"] for j in snapshot["jobs"]} + assert target.id in snapshot_ids + assert other.id not in snapshot_ids + + # The progress event that arrives is for the target, never `other`. + data = await _read_until_event(lines, "job") + assert data["id"] == target.id + + await _safe_cancel(registry, other.id) + await _safe_cancel(registry, target.id) + + +@pytest.mark.asyncio +async def test_sse_disconnect_leaves_job_running(app, registry, fast_keepalive): + """The decoupling guarantee: dropping the SSE stream mid-run must NOT stop + the job. Only explicit cancel/pause stops a job.""" + job = await registry.create("noop", {"steps": 6, "sleep_per_step_seconds": 0.05}) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient( + transport=transport, base_url="http://test" + ) as http_client: + async with http_client.stream("GET", "/api/jobs/events") as response: + lines = response.aiter_lines() + await _read_until_event(lines, "snapshot") + # Observe at least one live job event so we know the run is underway. + await _read_until_event(lines, "job") + # Exiting the `stream` context drops the client connection, which cancels + # the SSE subscription generator (CancellableStreamingResponse). The job + # task lives in the registry and must keep running. + + assert registry._jobs[job.id].status in ( + BackgroundJobStatus.RUNNING, + BackgroundJobStatus.SUCCEEDED, + ) + await _wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + assert registry._jobs[job.id].result == {"completed_steps": 6} diff --git a/app/desktop/studio_server/jobs/test_error_log.py b/app/desktop/studio_server/jobs/test_error_log.py new file mode 100644 index 000000000..d4291c9de --- /dev/null +++ b/app/desktop/studio_server/jobs/test_error_log.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import uuid + +import pytest + +from app.desktop.studio_server.jobs import error_log + + +@pytest.fixture +def run_id(tmp_path, monkeypatch): + monkeypatch.setattr( + "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir", + lambda: str(tmp_path), + ) + return str(uuid.uuid4()) + + +def test_append_and_read_round_trip(run_id): + error_log.append_error(run_id, {"error_message": "first", "step": 1}) + error_log.append_error(run_id, {"error_message": "second", "item_id": "x"}) + + entries = error_log.read_errors(run_id) + assert entries == [ + {"error_message": "first", "step": 1}, + {"error_message": "second", "item_id": "x"}, + ] + + +def test_read_missing_file_returns_empty(run_id): + assert error_log.read_errors(run_id) == [] + + +def test_read_skips_unparsable_lines(run_id): + error_log.append_error(run_id, {"error_message": "good"}) + with error_log.error_log_path(run_id).open("a", encoding="utf-8") as f: + f.write("not json at all\n") + f.write("\n") + error_log.append_error(run_id, {"error_message": "also good"}) + + entries = error_log.read_errors(run_id) + assert entries == [ + {"error_message": "good"}, + {"error_message": "also good"}, + ] + + +def test_delete_removes_file(run_id): + error_log.append_error(run_id, {"error_message": "x"}) + assert error_log.error_log_path(run_id).exists() + + error_log.delete_errors(run_id) + assert not error_log.error_log_path(run_id).exists() + assert error_log.read_errors(run_id) == [] + + +def test_delete_missing_file_is_noop(run_id): + error_log.delete_errors(run_id) + assert error_log.read_errors(run_id) == [] + + +def test_append_never_raises_on_bad_dir(monkeypatch, run_id): + def boom(*args, **kwargs): + raise OSError("disk full") + + monkeypatch.setattr("app.desktop.studio_server.jobs.error_log.Path.mkdir", boom) + error_log.append_error(run_id, {"error_message": "swallowed"}) diff --git a/app/desktop/studio_server/jobs/test_events.py b/app/desktop/studio_server/jobs/test_events.py new file mode 100644 index 000000000..2a60e3f2f --- /dev/null +++ b/app/desktop/studio_server/jobs/test_events.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import asyncio + +import pytest +from app.desktop.studio_server.jobs.events import JobEvent, JobEventBus +from app.desktop.studio_server.jobs.models import BackgroundJobStatus, JobRecord + + +def _record( + job_id: str = "j_aaaaaaaaaaaa", + type_name: str = "noop", + project_id: str | None = None, + status: BackgroundJobStatus = BackgroundJobStatus.RUNNING, +) -> JobRecord: + return JobRecord( + id=job_id, + type=type_name, + status=status, + project_id=project_id, + ) + + +async def _next_event(gen, timeout: float = 1.0) -> JobEvent: + return await asyncio.wait_for(gen.__anext__(), timeout=timeout) + + +@pytest.mark.asyncio +async def test_snapshot_then_job_event(): + existing = _record("j_existing0001") + bus = JobEventBus(snapshot_provider=lambda: [existing]) + + gen = bus.subscribe() + snapshot = await _next_event(gen) + assert snapshot.event == "snapshot" + assert [j["id"] for j in snapshot.data["jobs"]] == ["j_existing0001"] + + new = _record("j_new000000001") + bus.publish_job(new) + job_event = await _next_event(gen) + assert job_event.event == "job" + assert job_event.data["id"] == "j_new000000001" + + await gen.aclose() + + +@pytest.mark.asyncio +async def test_deleted_event(): + bus = JobEventBus(snapshot_provider=lambda: []) + gen = bus.subscribe() + await _next_event(gen) # snapshot + + bus.publish_deleted("j_gone00000001") + event = await _next_event(gen) + assert event.event == "deleted" + assert event.data == {"id": "j_gone00000001"} + + await gen.aclose() + + +@pytest.mark.asyncio +async def test_filter_by_project_id(): + matching = _record("j_match0000001", project_id="p_keep") + other = _record("j_other0000001", project_id="p_drop") + bus = JobEventBus(snapshot_provider=lambda: [matching, other]) + + gen = bus.subscribe(project_id="p_keep") + snapshot = await _next_event(gen) + assert [j["id"] for j in snapshot.data["jobs"]] == ["j_match0000001"] + + bus.publish_job(other) + bus.publish_job(matching) + event = await _next_event(gen) + assert event.data["id"] == "j_match0000001" + + await gen.aclose() + + +@pytest.mark.asyncio +async def test_filter_by_type_and_job_id(): + bus = JobEventBus(snapshot_provider=lambda: []) + gen = bus.subscribe(type_name="eval", job_id="j_target000001") + await _next_event(gen) # snapshot + + bus.publish_job(_record("j_other0000001", type_name="noop")) + bus.publish_job(_record("j_target000001", type_name="eval")) + event = await _next_event(gen) + assert event.data["id"] == "j_target000001" + + await gen.aclose() diff --git a/app/desktop/studio_server/jobs/test_registry.py b/app/desktop/studio_server/jobs/test_registry.py new file mode 100644 index 000000000..2dab8909c --- /dev/null +++ b/app/desktop/studio_server/jobs/test_registry.py @@ -0,0 +1,723 @@ +from __future__ import annotations + +import asyncio +import uuid + +import pytest +from pydantic import BaseModel + +from app.desktop.studio_server.jobs import error_log +from app.desktop.studio_server.jobs.models import ( + JobDerivedState, + BackgroundJobStatus, + JobWorker, +) +from app.desktop.studio_server.jobs.registry import ( + JobNotFoundError, + JobOperationError, + JobRegistry, + _new_job_id, +) +from app.desktop.studio_server.jobs.workers.noop import NoopJobWorker + + +@pytest.fixture(autouse=True) +def temp_error_log_dir(tmp_path, monkeypatch): + monkeypatch.setattr( + "app.desktop.studio_server.jobs.error_log.tempfile.gettempdir", + lambda: str(tmp_path), + ) + + +@pytest.fixture +def registry(): + reg = JobRegistry(max_concurrent=10) + reg.register_type(NoopJobWorker) + return reg + + +async def wait_for_status( + registry: JobRegistry, + job_id: str, + target: BackgroundJobStatus | set[BackgroundJobStatus], + timeout: float = 3.0, +) -> None: + targets = {target} if isinstance(target, BackgroundJobStatus) else target + deadline = asyncio.get_event_loop().time() + timeout + while asyncio.get_event_loop().time() < deadline: + job = registry._jobs.get(job_id) + if job is not None and job.status in targets: + return + await asyncio.sleep(0.01) + job = registry._jobs.get(job_id) + actual = job.status if job else "missing" + raise AssertionError(f"Job {job_id} did not reach {targets}; was {actual}") + + +# -- supporting test workers ------------------------------------------------ + + +class _EmptyParams(BaseModel): + pass + + +class _EmptyResult(BaseModel): + pass + + +class NonPausableWorker(JobWorker[_EmptyParams, _EmptyResult]): + type_name = "nonpausable" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = False + + async def run(self, params, ctx): + await asyncio.sleep(5) + return _EmptyResult() + + +class AlreadyCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]): + type_name = "already_complete" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + run_called = False + + async def compute_state(self, params): + return JobDerivedState(total=5, success=5, error=0, is_complete=True) + + async def run(self, params, ctx): + type(self).run_called = True + return _EmptyResult() + + +class PartialProgressWorker(JobWorker[_EmptyParams, _EmptyResult]): + """First reports the full set (total + message), then a count-only update. + The later partial update must preserve the earlier total/message, not null + them. + """ + + type_name = "partial_progress" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = False + + async def run(self, params, ctx): + await ctx.report_progress(success=1, total=50, message="starting") + await ctx.report_progress(success=5) + return _EmptyResult() + + +class RaceCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]): + """run() blocks on a test-controlled gate, then returns normally without + ever observing a cancellation. The test opens the gate (so run() returns and + the supervising task drives the job to its terminal succeeded state) and only + then issues pause/cancel — reproducing the completion-vs-cancel race where + the job finished naturally during the cancel await. + """ + + type_name = "race_complete" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + gate: asyncio.Event + + async def run(self, params, ctx): + await type(self).gate.wait() + return _EmptyResult() + + +class SwallowCancelWorker(JobWorker[_EmptyParams, _EmptyResult]): + """Catches CancelledError, fully clears the cancellation (uncancel) so it is + not re-raised, and returns normally — the worst-case "swallows CancelledError + and returns silently" worker. The cancellation transition is unconditional, + so the registry itself must land the job in paused/cancelled rather than + trusting the worker to re-raise. + + `started` is set once run() is actually suspended at its await point, so a + test can guarantee the cancellation is delivered into the worker body (not + before it runs) before issuing pause/cancel. + """ + + type_name = "swallow_cancel" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + started: asyncio.Event + gate: asyncio.Event + + async def run(self, params, ctx): + type(self).started.set() + try: + await type(self).gate.wait() + except asyncio.CancelledError: + task = asyncio.current_task() + if task is not None: + task.uncancel() + return _EmptyResult() + + +class TotalThenNoneWorker(JobWorker[_EmptyParams, _EmptyResult]): + """run() reports a known total via report_progress, then compute_state at + pause returns total=None alongside success/error counts. The reconcile must + preserve the prior total rather than wiping the denominator to None. + """ + + type_name = "total_then_none" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + started: asyncio.Event + gate: asyncio.Event + + async def compute_state(self, params): + return JobDerivedState(total=None, success=2, error=1, is_complete=False) + + async def run(self, params, ctx): + await ctx.report_progress(success=0, total=10, message="starting") + type(self).started.set() + try: + await type(self).gate.wait() + except asyncio.CancelledError: + task = asyncio.current_task() + if task is not None: + task.uncancel() + return _EmptyResult() + + +class ReconcileCompleteWorker(JobWorker[_EmptyParams, _EmptyResult]): + """compute_state reports complete only once the test flips `done`, so a + get() issued while the job is still running (run() is a long sleep) + reconciles it straight to succeeded mid-flight. + """ + + type_name = "reconcile_complete" + params_model = _EmptyParams + result_model = _EmptyResult + supports_pause = True + done = False + + async def compute_state(self, params): + complete = type(self).done + return JobDerivedState( + total=3, success=3 if complete else 1, error=0, is_complete=complete + ) + + async def run(self, params, ctx): + await asyncio.sleep(5) + return _EmptyResult() + + +# -- job id ------------------------------------------------------------------ + + +def test_job_id_format(): + job_id = _new_job_id() + assert job_id.startswith("j_") + suffix = job_id[2:] + assert len(suffix) == 12 + assert all(c in "abcdefghijklmnopqrstuvwxyz234567" for c in suffix) + + +# -- lifecycle --------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_full_lifecycle_succeeds(registry): + job = await registry.create("noop", {"steps": 3, "sleep_per_step_seconds": 0.01}) + assert job.status in (BackgroundJobStatus.PENDING, BackgroundJobStatus.RUNNING) + assert job.supports_pause is True + + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + final = registry._jobs[job.id] + assert final.result == {"completed_steps": 3} + assert final.started_at is not None + assert final.ended_at is not None + assert final.run_id is not None + assert final.progress.success == 3 + + +@pytest.mark.asyncio +async def test_failure_path_captures_error_log(registry): + job = await registry.create( + "noop", + {"steps": 5, "sleep_per_step_seconds": 0.01, "fail_at_step": 2}, + ) + await wait_for_status(registry, job.id, BackgroundJobStatus.FAILED) + + final = registry._jobs[job.id] + assert final.error is not None + assert final.error.error is not None + assert "intentional fail at step 2" in final.error.error + + entries = error_log.read_errors(final.run_id) + fatal = [e for e in entries if e.get("fatal")] + assert len(fatal) == 1 + assert "intentional fail at step 2" in fatal[0]["error_message"] + + +@pytest.mark.asyncio +async def test_non_fatal_errors_logged_and_counted(registry): + job = await registry.create( + "noop", + { + "steps": 4, + "sleep_per_step_seconds": 0.01, + "error_at_steps": [1, 3], + }, + ) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + + final = registry._jobs[job.id] + assert final.progress.error == 2 + assert final.progress.success == 2 + + entries = error_log.read_errors(final.run_id) + messages = [e["error_message"] for e in entries] + assert "intentional error at step 1" in messages + assert "intentional error at step 3" in messages + steps = sorted(e["step"] for e in entries if "step" in e) + assert steps == [1, 3] + + +@pytest.mark.asyncio +async def test_error_log_missing_returns_empty(): + assert error_log.read_errors(str(uuid.uuid4())) == [] + + +# -- cancel ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_cancel_pending_job_never_starts(): + reg = JobRegistry(max_concurrent=1) + reg.register_type(NoopJobWorker) + running = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + await wait_for_status(reg, running.id, BackgroundJobStatus.RUNNING) + pending = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + assert reg._jobs[pending.id].status == BackgroundJobStatus.PENDING + + await reg.cancel(pending.id) + assert reg._jobs[pending.id].status == BackgroundJobStatus.CANCELLED + assert pending.id not in reg._tasks + + await reg.cancel(running.id) + + +@pytest.mark.asyncio +async def test_cancel_from_running(registry): + job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + await registry.cancel(job.id) + assert registry._jobs[job.id].status == BackgroundJobStatus.CANCELLED + + +@pytest.mark.asyncio +async def test_cancel_immediately_after_create_reclaims_slot(): + # Cancelling right after create can race the supervising task before its + # coroutine body runs; the registry must still reclaim the concurrency slot. + reg = JobRegistry(max_concurrent=2) + reg.register_type(NoopJobWorker) + ids = [] + for _ in range(6): + job = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.02}) + ids.append(job.id) + for job_id in ids: + await reg.cancel(job_id) + await asyncio.sleep(0.05) + + assert all(reg._jobs[i].status == BackgroundJobStatus.CANCELLED for i in ids) + assert reg._running_count == 0 + assert reg._tasks == {} + assert reg._pending_ids == [] + + +@pytest.mark.asyncio +async def test_cancel_terminal_raises(registry): + job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01}) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + with pytest.raises(JobOperationError): + await registry.cancel(job.id) + + +# -- pause / resume ---------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_pause_then_resume_succeeds(registry): + job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.03}) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + first_run_id = registry._jobs[job.id].run_id + + await registry.pause(job.id) + assert registry._jobs[job.id].status == BackgroundJobStatus.PAUSED + + # Make resume finish quickly by checking it re-runs with a fresh run_id. + await registry.resume(job.id) + assert registry._jobs[job.id].status in ( + BackgroundJobStatus.PENDING, + BackgroundJobStatus.RUNNING, + ) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + second_run_id = registry._jobs[job.id].run_id + assert second_run_id is not None + assert second_run_id != first_run_id + + await registry.cancel(job.id) + + +@pytest.mark.asyncio +async def test_resume_to_succeeded_when_complete(): + reg = JobRegistry(max_concurrent=2) + reg.register_type(NoopJobWorker) + reg.register_type(AlreadyCompleteWorker) + AlreadyCompleteWorker.run_called = False + + # Start a noop that we pause so we have a paused job to resume against a + # complete worker. Simpler: create the complete worker job, it succeeds + # immediately via reconcile at launch. + job = await reg.create("already_complete", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.SUCCEEDED) + assert AlreadyCompleteWorker.run_called is False + assert reg._jobs[job.id].progress.success == 5 + + +@pytest.mark.asyncio +async def test_pause_rejected_when_not_supported(): + reg = JobRegistry(max_concurrent=2) + reg.register_type(NonPausableWorker) + job = await reg.create("nonpausable", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + with pytest.raises(JobOperationError): + await reg.pause(job.id) + await reg.cancel(job.id) + + +@pytest.mark.asyncio +async def test_pause_rejected_when_not_running(registry): + job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01}) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + with pytest.raises(JobOperationError): + await registry.pause(job.id) + + +@pytest.mark.asyncio +async def test_resume_rejected_when_not_paused(registry): + job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + with pytest.raises(JobOperationError): + await registry.resume(job.id) + await registry.cancel(job.id) + + +async def _drive_completion_race(operation: str) -> JobRegistry: + # Reproduce the completion-vs-cancel race deterministically: the worker's + # run() is gated; we open the gate at the exact moment the lifecycle op + # begins its cancel await, so the supervising task finishes naturally + # (job -> succeeded, task done) before/while task.cancel() lands. The job + # was running at the op's entry check, so it gets past the guard, but the + # terminal succeeded state must survive. + reg = JobRegistry(max_concurrent=2) + reg.register_type(RaceCompleteWorker) + RaceCompleteWorker.gate = asyncio.Event() + job = await reg.create("race_complete", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + + original_cancel_task = reg._cancel_task + + async def open_gate_then_cancel(job_id: str) -> None: + # Let run() return and the supervising task drive to terminal first. + RaceCompleteWorker.gate.set() + task = reg._tasks.get(job_id) + if task is not None: + try: + await task + except asyncio.CancelledError: + pass + await original_cancel_task(job_id) + + reg._cancel_task = open_gate_then_cancel # type: ignore[method-assign] + + if operation == "pause": + await reg.pause(job.id) + else: + await reg.cancel(job.id) + return reg + + +@pytest.mark.asyncio +async def test_pause_loses_race_to_natural_completion_keeps_succeeded(): + # Regression: if run() completes naturally during pause()'s cancel-await, + # the job is already terminal (succeeded) and pause() must not clobber it + # back to paused (which would drop the result and allow a resume re-run). + reg = await _drive_completion_race("pause") + job_id = next(iter(reg._jobs)) + assert reg._jobs[job_id].status == BackgroundJobStatus.SUCCEEDED + assert reg._jobs[job_id].result is not None + + +@pytest.mark.asyncio +async def test_cancel_loses_race_to_natural_completion_keeps_succeeded(): + # The cancel() path already guards on is_terminal; lock it in. + reg = await _drive_completion_race("cancel") + job_id = next(iter(reg._jobs)) + assert reg._jobs[job_id].status == BackgroundJobStatus.SUCCEEDED + assert reg._jobs[job_id].result is not None + + +@pytest.mark.asyncio +async def test_pause_enforced_when_worker_swallows_cancel(): + # A worker that catches CancelledError (and uncancels it) then returns + # normally must still be paused, not succeeded — the cancellation transition + # is unconditional and enforced by the registry, not the worker. + reg = JobRegistry(max_concurrent=2) + reg.register_type(SwallowCancelWorker) + SwallowCancelWorker.started = asyncio.Event() + SwallowCancelWorker.gate = asyncio.Event() + job = await reg.create("swallow_cancel", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + await asyncio.wait_for(SwallowCancelWorker.started.wait(), timeout=3.0) + + result = await reg.pause(job.id) + assert result.status == BackgroundJobStatus.PAUSED + assert reg._jobs[job.id].result is None + + +@pytest.mark.asyncio +async def test_cancel_enforced_when_worker_swallows_cancel(): + reg = JobRegistry(max_concurrent=2) + reg.register_type(SwallowCancelWorker) + SwallowCancelWorker.started = asyncio.Event() + SwallowCancelWorker.gate = asyncio.Event() + job = await reg.create("swallow_cancel", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + await asyncio.wait_for(SwallowCancelWorker.started.wait(), timeout=3.0) + + result = await reg.cancel(job.id) + assert result.status == BackgroundJobStatus.CANCELLED + assert reg._jobs[job.id].result is None + + +@pytest.mark.asyncio +async def test_cancel_from_paused(): + reg = JobRegistry(max_concurrent=2) + reg.register_type(NoopJobWorker) + job = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.03}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + await reg.pause(job.id) + assert reg._jobs[job.id].status == BackgroundJobStatus.PAUSED + + result = await reg.cancel(job.id) + assert result.status == BackgroundJobStatus.CANCELLED + assert reg._jobs[job.id].status == BackgroundJobStatus.CANCELLED + assert reg._jobs[job.id].ended_at is not None + + +# -- delete ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_delete_terminal_emits_deleted(registry): + job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01}) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + + events = [] + gen = registry.events.subscribe() + await asyncio.wait_for(gen.__anext__(), timeout=1.0) # snapshot + + async def collect(): + async for event in gen: + events.append(event) + + collector = asyncio.create_task(collect()) + await registry.delete(job.id) + await asyncio.sleep(0.05) + collector.cancel() + try: + await collector + except asyncio.CancelledError: + pass + + assert job.id not in registry._jobs + assert any(e.event == "deleted" and e.data["id"] == job.id for e in events) + + +@pytest.mark.asyncio +async def test_delete_running_raises(registry): + job = await registry.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + with pytest.raises(JobOperationError): + await registry.delete(job.id) + await registry.cancel(job.id) + + +@pytest.mark.asyncio +async def test_delete_pending_raises(): + reg = JobRegistry(max_concurrent=1) + reg.register_type(NoopJobWorker) + running = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + await wait_for_status(reg, running.id, BackgroundJobStatus.RUNNING) + pending = await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + assert reg._jobs[pending.id].status == BackgroundJobStatus.PENDING + with pytest.raises(JobOperationError): + await reg.delete(pending.id) + await reg.cancel(running.id) + await reg.cancel(pending.id) + + +# -- reconciliation ---------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_compute_state_none_keeps_snapshot(registry): + # Noop's compute_state returns None, so the believed snapshot from + # report_progress is preserved and never flipped to complete early. + job = await registry.create("noop", {"steps": 4, "sleep_per_step_seconds": 0.02}) + await wait_for_status(registry, job.id, BackgroundJobStatus.RUNNING) + # get() triggers reconcile; with None it must not change progress/status. + got = await registry.get(job.id) + assert got is not None + assert got.status in (BackgroundJobStatus.RUNNING, BackgroundJobStatus.SUCCEEDED) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + assert registry._jobs[job.id].progress.success == 4 + + +@pytest.mark.asyncio +async def test_report_progress_preserves_total_and_message_when_omitted(): + # A count-only report_progress call must not wipe a total/message set by an + # earlier call. + reg = JobRegistry(max_concurrent=2) + reg.register_type(PartialProgressWorker) + job = await reg.create("partial_progress", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.SUCCEEDED) + + final = reg._jobs[job.id] + assert final.progress.success == 5 + assert final.progress.total == 50 + assert final.progress.message == "starting" + + +@pytest.mark.asyncio +async def test_apply_derived_preserves_total_when_compute_state_returns_none(): + # A compute_state that returns total=None (unknown denominator) alongside + # success/error counts must not wipe a total set earlier via report_progress. + # total=None means "unknown, keep what we had", mirroring message handling. + reg = JobRegistry(max_concurrent=2) + reg.register_type(TotalThenNoneWorker) + TotalThenNoneWorker.started = asyncio.Event() + TotalThenNoneWorker.gate = asyncio.Event() + job = await reg.create("total_then_none", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + await asyncio.wait_for(TotalThenNoneWorker.started.wait(), timeout=3.0) + assert reg._jobs[job.id].progress.total == 10 + + # pause() runs compute_state (total=None, success=2, error=1) through + # _apply_derived; the prior total of 10 must survive. + result = await reg.pause(job.id) + assert result.status == BackgroundJobStatus.PAUSED + assert result.progress.total == 10 + assert result.progress.success == 2 + assert result.progress.error == 1 + + +@pytest.mark.asyncio +async def test_get_reconciles_running_job_to_succeeded_mid_flight(): + # A long-running job whose source-of-truth state flips to complete should be + # reconciled straight to succeeded by get() (the running/get() reconcile + # path), not only at launch time. + reg = JobRegistry(max_concurrent=2) + reg.register_type(ReconcileCompleteWorker) + ReconcileCompleteWorker.done = False + job = await reg.create("reconcile_complete", {}) + await wait_for_status(reg, job.id, BackgroundJobStatus.RUNNING) + # Still running here (run() is a 5s sleep); now flip the source of truth. + assert reg._jobs[job.id].status == BackgroundJobStatus.RUNNING + ReconcileCompleteWorker.done = True + + got = await reg.get(job.id) + assert got is not None + assert got.status == BackgroundJobStatus.SUCCEEDED + assert got.progress.success == 3 + assert got.ended_at is not None + + +# -- concurrency ------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_semaphore_caps_concurrency_fifo(): + reg = JobRegistry(max_concurrent=2) + reg.register_type(NoopJobWorker) + + jobs = [] + for _ in range(4): + jobs.append( + await reg.create("noop", {"steps": 50, "sleep_per_step_seconds": 0.05}) + ) + + await asyncio.sleep(0.05) + statuses = [reg._jobs[j.id].status for j in jobs] + running = [s for s in statuses if s == BackgroundJobStatus.RUNNING] + pending = [s for s in statuses if s == BackgroundJobStatus.PENDING] + assert len(running) == 2 + assert len(pending) == 2 + # FIFO: the first two created are the running ones. + assert statuses[0] == BackgroundJobStatus.RUNNING + assert statuses[1] == BackgroundJobStatus.RUNNING + assert statuses[2] == BackgroundJobStatus.PENDING + assert statuses[3] == BackgroundJobStatus.PENDING + + # Cancel the running ones; pending should be promoted. + await reg.cancel(jobs[0].id) + await reg.cancel(jobs[1].id) + await wait_for_status(reg, jobs[2].id, BackgroundJobStatus.RUNNING) + await wait_for_status(reg, jobs[3].id, BackgroundJobStatus.RUNNING) + + await reg.cancel(jobs[2].id) + await reg.cancel(jobs[3].id) + + +# -- events ------------------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_registry_emits_snapshot_and_job_events(registry): + gen = registry.events.subscribe() + snapshot = await asyncio.wait_for(gen.__anext__(), timeout=1.0) + assert snapshot.event == "snapshot" + assert snapshot.data["jobs"] == [] + + events = [] + + async def collect(): + async for event in gen: + events.append(event) + + collector = asyncio.create_task(collect()) + job = await registry.create("noop", {"steps": 2, "sleep_per_step_seconds": 0.01}) + await wait_for_status(registry, job.id, BackgroundJobStatus.SUCCEEDED) + await asyncio.sleep(0.02) + collector.cancel() + try: + await collector + except asyncio.CancelledError: + pass + + job_events = [e for e in events if e.event == "job"] + assert len(job_events) >= 2 + assert any(e.data["status"] == "running" for e in job_events) + assert any(e.data["status"] == "succeeded" for e in job_events) + + +# -- not found --------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_unknown_returns_none(registry): + assert await registry.get("j_doesnotexist") is None + + +@pytest.mark.asyncio +async def test_lifecycle_op_unknown_raises(registry): + with pytest.raises(JobNotFoundError): + await registry.cancel("j_doesnotexist") diff --git a/app/desktop/studio_server/jobs/workers/__init__.py b/app/desktop/studio_server/jobs/workers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/app/desktop/studio_server/jobs/workers/eval.py b/app/desktop/studio_server/jobs/workers/eval.py new file mode 100644 index 000000000..89f540fa6 --- /dev/null +++ b/app/desktop/studio_server/jobs/workers/eval.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +from app.desktop.git_sync.save_context import save_context_for_project +from kiln_ai.adapters.eval.eval_runner import EvalRunner +from kiln_ai.datamodel.dataset_filters import dataset_filter_from_id +from kiln_ai.datamodel.eval import Eval, EvalConfig +from kiln_ai.datamodel.task import Task +from pydantic import BaseModel + +from ...eval_api import eval_config_from_id, task_run_config_from_id +from ..models import JobContext, JobDerivedState, JobWorker + + +class EvalJobParams(BaseModel): + project_id: str + task_id: str + eval_id: str + eval_config_id: str + run_config_id: str + + +class EvalJobResult(BaseModel): + total: int + success: int + error: int + + +class EvalJobWorker(JobWorker[EvalJobParams, EvalJobResult]): + """Background worker that runs an eval against a single run config. + + Wraps the existing EvalRunner unchanged. Idempotent: EvalRunner excludes + already-run (eval_config, run_config, dataset) triples, so a paused-then- + resumed (or re-triggered) job skips completed items and writes no duplicate + EvalRun entities — hence supports_pause = True. + """ + + type_name = "eval" + params_model = EvalJobParams + result_model = EvalJobResult + supports_pause = True + + async def compute_state(self, params: EvalJobParams) -> JobDerivedState: + eval_config = eval_config_from_id( + params.project_id, + params.task_id, + params.eval_id, + params.eval_config_id, + ) + eval, task = self._eval_and_task(eval_config) + + # The eval-set filter defines the universe of dataset items in scope. + # EvalRunner only works items that BOTH pass this filter AND lack a + # matching EvalRun, so progress must be measured against this set. + filter = dataset_filter_from_id(eval.eval_set_filter_id) + in_filter_ids = { + task_run.id for task_run in task.runs(readonly=True) if filter(task_run) + } + total = len(in_filter_ids) + + # Count only scored items that are still in the filter set. Items that + # were scored but later drifted out of the filter must not be counted, + # or success/is_complete would overcount and a resume could short-circuit + # to succeeded while real work remains. + scored_ids = { + run.dataset_id + for run in eval_config.runs(readonly=True) + if run.task_run_config_id == params.run_config_id + } + success = len(scored_ids & in_filter_ids) + + return JobDerivedState( + total=total, + success=success, + error=0, + is_complete=success >= total, + ) + + async def run(self, params: EvalJobParams, ctx: JobContext) -> EvalJobResult: + # Baseline: items already scored (and still in-filter) before this run. + # EvalRunner only works the unfinished remainder, so its Progress counts + # are relative to that remainder. We add the baseline back so progress + # and the returned result are reported against the FULL eval-set size, + # not just the work left for this run. + baseline = await self.compute_state(params) + baseline_success = baseline.success + + eval_runner = self._build_eval_runner(params) + + success = baseline_success + total = baseline.total if baseline.total is not None else baseline_success + error = 0 + async for progress in eval_runner.run(): + # progress.total = full - baseline_success (the unfinished remainder), + # so baseline_success + progress.total = the full eval-set size. + success = baseline_success + progress.complete + total = baseline_success + progress.total + error = progress.errors + await ctx.report_progress( + success=success, + error=error, + total=total, + ) + + return EvalJobResult(total=total, success=success, error=error) + + def _build_eval_runner(self, params: EvalJobParams) -> EvalRunner: + eval_config = eval_config_from_id( + params.project_id, + params.task_id, + params.eval_id, + params.eval_config_id, + ) + run_config = task_run_config_from_id( + params.project_id, + params.task_id, + params.run_config_id, + ) + save_context = save_context_for_project( + params.project_id, + context=f"eval job {params.eval_id}/{params.run_config_id}", + ) + return EvalRunner( + eval_configs=[eval_config], + run_configs=[run_config], + eval_run_type="task_run_eval", + save_context=save_context, + ) + + def _eval_and_task(self, eval_config: EvalConfig) -> tuple[Eval, Task]: + eval = eval_config.parent_eval() + if eval is None: + raise ValueError("Eval config has no parent eval") + task = eval.parent_task() + if task is None: + raise ValueError("Eval has no parent task") + return eval, task diff --git a/app/desktop/studio_server/jobs/workers/noop.py b/app/desktop/studio_server/jobs/workers/noop.py new file mode 100644 index 000000000..23cc8d04a --- /dev/null +++ b/app/desktop/studio_server/jobs/workers/noop.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import asyncio + +from pydantic import BaseModel + +from ..models import JobContext, JobDerivedState, JobWorker + + +class NoopJobParams(BaseModel): + steps: int = 10 + sleep_per_step_seconds: float = 0.5 + fail_at_step: int | None = None + error_at_steps: list[int] = [] + + +class NoopJobResult(BaseModel): + completed_steps: int + + +class NoopJobWorker(JobWorker[NoopJobParams, NoopJobResult]): + type_name = "noop" + params_model = NoopJobParams + result_model = NoopJobResult + supports_pause = True + + async def compute_state(self, params: NoopJobParams) -> JobDerivedState | None: + return None + + async def run(self, params: NoopJobParams, ctx: JobContext) -> NoopJobResult: + success = error = 0 + for i in range(params.steps): + await asyncio.sleep(params.sleep_per_step_seconds) + if params.fail_at_step == i: + raise RuntimeError(f"intentional fail at step {i}") + if i in params.error_at_steps: + error += 1 + await ctx.report_error(f"intentional error at step {i}", step=i) + else: + success += 1 + await ctx.report_progress( + success=success, + error=error, + total=params.steps, + message=f"step {i + 1}/{params.steps}", + ) + return NoopJobResult(completed_steps=success + error) diff --git a/app/desktop/studio_server/jobs/workers/test_eval.py b/app/desktop/studio_server/jobs/workers/test_eval.py new file mode 100644 index 000000000..0715344f9 --- /dev/null +++ b/app/desktop/studio_server/jobs/workers/test_eval.py @@ -0,0 +1,535 @@ +from __future__ import annotations + +from contextlib import contextmanager +from typing import AsyncIterator +from unittest.mock import patch + +import pytest +from app.desktop.studio_server.jobs.models import BackgroundJobStatus +from app.desktop.studio_server.jobs.registry import JobRegistry +from app.desktop.studio_server.jobs.workers.eval import ( + EvalJobParams, + EvalJobResult, + EvalJobWorker, +) +from kiln_ai.adapters.ml_model_list import ModelProviderName +from kiln_ai.datamodel import ( + DataSource, + DataSourceType, + Project, + Task, + TaskOutput, + TaskOutputRatingType, + TaskRun, +) +from kiln_ai.datamodel.eval import ( + Eval, + EvalConfig, + EvalOutputScore, + EvalRun, +) +from kiln_ai.datamodel.run_config import KilnAgentRunConfigProperties +from kiln_ai.datamodel.task import StructuredOutputMode, TaskRunConfig +from kiln_ai.utils.async_job_runner import Progress + + +@pytest.fixture +def project(tmp_path): + project = Project( + id="project1", name="Test Project", path=tmp_path / "project.kiln" + ) + project.save_to_file() + return project + + +@pytest.fixture +def task(project): + task = Task( + id="task1", + name="Test Task", + description="test", + instruction="do the thing", + parent=project, + ) + task.save_to_file() + return task + + +@pytest.fixture +def eval(task): + eval = Eval( + id="eval1", + name="Test Eval", + description="test", + eval_set_filter_id="tag::eval_set", + eval_configs_filter_id="tag::golden", + output_scores=[ + EvalOutputScore( + name="Accuracy", + instruction="Check accuracy", + type=TaskOutputRatingType.pass_fail, + ), + ], + parent=task, + ) + eval.save_to_file() + return eval + + +@pytest.fixture +def eval_config(eval): + eval_config = EvalConfig( + id="eval_config1", + name="Test Eval Config", + model_name="gpt-4", + model_provider="openai", + properties={"eval_steps": ["step1", "step2"]}, + parent=eval, + ) + eval_config.save_to_file() + return eval_config + + +@pytest.fixture +def run_config(task): + run_config = TaskRunConfig( + id="run_config1", + name="Test Run Config", + description="test", + run_config_properties=KilnAgentRunConfigProperties( + model_name="gpt-4", + model_provider_name=ModelProviderName.openai, + prompt_id="simple_prompt_builder", + structured_output_mode=StructuredOutputMode.json_schema, + ), + parent=task, + ) + run_config.save_to_file() + return run_config + + +@pytest.fixture +def data_source(): + return DataSource( + type=DataSourceType.synthetic, + properties={ + "model_name": "gpt-4", + "model_provider": "openai", + "adapter_name": "test_adapter", + }, + ) + + +@pytest.fixture +def params(): + return EvalJobParams( + project_id="project1", + task_id="task1", + eval_id="eval1", + eval_config_id="eval_config1", + run_config_id="run_config1", + ) + + +@pytest.fixture +def resolve_project(project): + """Make the eval_api entity helpers resolve the on-disk project by id. + + task_from_id binds project_from_id into kiln_server.task_api, so we patch it + there (the name as looked up), not at its definition site. + """ + with patch("kiln_server.task_api.project_from_id", return_value=project): + yield project + + +def _make_task_run(task, data_source, tag: str) -> TaskRun: + task_run = TaskRun( + parent=task, + input="test", + input_source=data_source, + tags=[tag], + output=TaskOutput(output="test"), + ) + task_run.save_to_file() + return task_run + + +def _make_eval_run(eval_config, dataset_id, run_config_id) -> EvalRun: + eval_run = EvalRun( + parent=eval_config, + dataset_id=dataset_id, + task_run_config_id=run_config_id, + input="test", + output="test", + scores={"accuracy": 1.0}, + ) + eval_run.save_to_file() + return eval_run + + +@contextmanager +def _stub_eval_runner_run(progresses: list[Progress]): + async def fake_run(self, concurrency: int = 25) -> AsyncIterator[Progress]: + for progress in progresses: + yield progress + + with patch( + "kiln_ai.adapters.eval.eval_runner.EvalRunner.run", + new=fake_run, + ): + yield + + +# -- compute_state ----------------------------------------------------------- + + +async def test_compute_state_no_eval_runs( + resolve_project, task, eval_config, run_config, data_source, params +): + for _ in range(3): + _make_task_run(task, data_source, "eval_set") + # A task run outside the eval-set filter must not be counted toward total. + _make_task_run(task, data_source, "other") + + state = await EvalJobWorker().compute_state(params) + + assert state.total == 3 + assert state.success == 0 + assert state.error == 0 + assert state.is_complete is False + + +async def test_compute_state_counts_already_scored( + resolve_project, task, eval_config, run_config, data_source, params +): + task_runs = [_make_task_run(task, data_source, "eval_set") for _ in range(3)] + _make_eval_run(eval_config, task_runs[0].id, run_config.id) + _make_eval_run(eval_config, task_runs[1].id, run_config.id) + + state = await EvalJobWorker().compute_state(params) + + assert state.total == 3 + assert state.success == 2 + assert state.is_complete is False + + +async def test_compute_state_is_complete( + resolve_project, task, eval_config, run_config, data_source, params +): + task_runs = [_make_task_run(task, data_source, "eval_set") for _ in range(2)] + for task_run in task_runs: + _make_eval_run(eval_config, task_run.id, run_config.id) + + state = await EvalJobWorker().compute_state(params) + + assert state.total == 2 + assert state.success == 2 + assert state.is_complete is True + + +async def test_compute_state_ignores_other_run_config( + resolve_project, task, eval_config, run_config, data_source, params +): + task_runs = [_make_task_run(task, data_source, "eval_set") for _ in range(2)] + # Scored under a different run config — must not be counted. + _make_eval_run(eval_config, task_runs[0].id, "some_other_run_config") + + state = await EvalJobWorker().compute_state(params) + + assert state.total == 2 + assert state.success == 0 + assert state.is_complete is False + + +async def test_compute_state_ignores_scored_items_out_of_filter( + resolve_project, task, eval_config, run_config, data_source, params +): + # Two items in the eval-set filter, both scored. + in_filter = [_make_task_run(task, data_source, "eval_set") for _ in range(2)] + for task_run in in_filter: + _make_eval_run(eval_config, task_run.id, run_config.id) + + # An item that was scored under this run config but is NOT in the eval-set + # filter (e.g. it drifted out / was tagged differently). EvalRunner would + # never work it, so it must not count toward success or flip is_complete. + out_of_filter = _make_task_run(task, data_source, "other") + _make_eval_run(eval_config, out_of_filter.id, run_config.id) + + state = await EvalJobWorker().compute_state(params) + + # total reflects only in-filter items; the out-of-filter scored item is + # neither counted in total nor in success. + assert state.total == 2 + assert state.success == 2 + assert state.is_complete is True + + +async def test_compute_state_out_of_filter_does_not_short_circuit( + resolve_project, task, eval_config, run_config, data_source, params +): + # Three in-filter items; only one scored. Two remain to be worked. + in_filter = [_make_task_run(task, data_source, "eval_set") for _ in range(3)] + _make_eval_run(eval_config, in_filter[0].id, run_config.id) + + # Extra scored items that are out-of-filter. A naive count would inflate + # success to 3 and falsely report is_complete, short-circuiting a resume. + for _ in range(5): + out_of_filter = _make_task_run(task, data_source, "other") + _make_eval_run(eval_config, out_of_filter.id, run_config.id) + + state = await EvalJobWorker().compute_state(params) + + assert state.total == 3 + assert state.success == 1 + assert state.is_complete is False + + +async def test_compute_state_missing_eval_config_raises( + resolve_project, task, run_config, data_source +): + # No EvalConfig (or Eval) with this id exists on disk: the entity loader + # raises rather than silently reporting "no progress", so the failure is + # visible to the registry during reconciliation. + bad_params = EvalJobParams( + project_id="project1", + task_id="task1", + eval_id="missing_eval", + eval_config_id="missing_eval_config", + run_config_id="run_config1", + ) + + with pytest.raises(Exception): + await EvalJobWorker().compute_state(bad_params) + + +# -- run --------------------------------------------------------------------- + + +async def test_run_maps_progress_and_returns_result( + resolve_project, task, eval_config, run_config, data_source, params +): + progresses = [ + Progress(complete=0, total=3, errors=0), + Progress(complete=1, total=3, errors=0), + Progress(complete=2, total=3, errors=1), + ] + + reported: list[tuple[int, int, int | None]] = [] + + class FakeCtx: + job_id = "j_test" + run_id = "run_test" + + async def report_progress(self, success, error=0, total=None, message=None): + reported.append((success, error, total)) + + async def report_error(self, error_message, **extra): + pass + + with _stub_eval_runner_run(progresses): + result = await EvalJobWorker().run(params, FakeCtx()) + + assert reported == [(0, 0, 3), (1, 0, 3), (2, 1, 3)] + assert result == EvalJobResult(total=3, success=2, error=1) + + +async def test_run_no_items_returns_zero_summary( + resolve_project, task, eval_config, run_config, data_source, params +): + class FakeCtx: + job_id = "j_test" + run_id = "run_test" + + async def report_progress(self, success, error=0, total=None, message=None): + pass + + async def report_error(self, error_message, **extra): + pass + + # Real EvalRunner with an empty dataset yields only the initial Progress(0,0,0). + result = await EvalJobWorker().run(params, FakeCtx()) + + assert result == EvalJobResult(total=0, success=0, error=0) + + +async def test_run_idempotent_skips_already_scored( + resolve_project, task, eval_config, run_config, data_source, params +): + task_runs = [_make_task_run(task, data_source, "eval_set") for _ in range(3)] + # Two of three already scored. + _make_eval_run(eval_config, task_runs[0].id, run_config.id) + _make_eval_run(eval_config, task_runs[1].id, run_config.id) + + processed_dataset_ids: list = [] + + async def fake_run_job(self, job) -> bool: + processed_dataset_ids.append(job.item.id) + EvalRun( + parent=job.eval_config, + dataset_id=job.item.id, + task_run_config_id=job.task_run_config.id, + input="test", + output="test", + scores={"accuracy": 1.0}, + ).save_to_file() + return True + + class FakeCtx: + job_id = "j_test" + run_id = "run_test" + + async def report_progress(self, success, error=0, total=None, message=None): + pass + + async def report_error(self, error_message, **extra): + pass + + with patch( + "kiln_ai.adapters.eval.eval_runner.EvalRunner.run_job", + new=fake_run_job, + ): + result = await EvalJobWorker().run(params, FakeCtx()) + + # Only the single not-yet-scored item should have been processed. + assert processed_dataset_ids == [task_runs[2].id] + # Totals are reported against the FULL eval-set size (3), not just the work + # remaining for this run. Two were already scored (baseline), one processed. + assert result.total == 3 + assert result.success == 3 + + # No duplicate EvalRuns: three task runs, three EvalRuns total. + assert len(eval_config.runs(readonly=True)) == 3 + + +async def test_run_reports_full_set_totals_on_partial_resume( + resolve_project, task, eval_config, run_config, data_source, params +): + # 5-item eval set, 2 already scored (baseline). The stubbed runner only sees + # the remaining 3 items, so its Progress.total is 3 — but the worker must add + # the baseline back and report against the full set of 5. + task_runs = [_make_task_run(task, data_source, "eval_set") for _ in range(5)] + _make_eval_run(eval_config, task_runs[0].id, run_config.id) + _make_eval_run(eval_config, task_runs[1].id, run_config.id) + + # EvalRunner.run() yields counts relative to the unfinished remainder (3). + progresses = [ + Progress(complete=0, total=3, errors=0), + Progress(complete=1, total=3, errors=0), + Progress(complete=2, total=3, errors=0), + Progress(complete=3, total=3, errors=0), + ] + + reported: list[tuple[int, int, int | None]] = [] + + class FakeCtx: + job_id = "j_test" + run_id = "run_test" + + async def report_progress(self, success, error=0, total=None, message=None): + reported.append((success, error, total)) + + async def report_error(self, error_message, **extra): + pass + + with _stub_eval_runner_run(progresses): + result = await EvalJobWorker().run(params, FakeCtx()) + + # Reported success = baseline (2) + complete; total = baseline (2) + 3 = 5. + # The snapshot must not regress below the baseline of 2 already-scored items. + assert reported == [(2, 0, 5), (3, 0, 5), (4, 0, 5), (5, 0, 5)] + assert result == EvalJobResult(total=5, success=5, error=0) + + +# -- save_context wiring ----------------------------------------------------- + + +def test_build_eval_runner_passes_save_context_when_git_sync_enabled( + resolve_project, task, eval_config, run_config, params +): + sentinel = object() + + with patch( + "app.desktop.studio_server.jobs.workers.eval.save_context_for_project", + return_value=sentinel, + ) as mock_helper: + runner = EvalJobWorker()._build_eval_runner(params) + + mock_helper.assert_called_once_with( + params.project_id, + context=f"eval job {params.eval_id}/{params.run_config_id}", + ) + # The helper's SaveContext is threaded straight into the runner. + assert runner._save_context is sentinel + + +def test_build_eval_runner_defaults_to_noop_when_not_git_sync( + resolve_project, task, eval_config, run_config, params +): + from kiln_ai.utils.git_sync_protocols import default_save_context + + with patch( + "app.desktop.studio_server.jobs.workers.eval.save_context_for_project", + return_value=None, + ) as mock_helper: + runner = EvalJobWorker()._build_eval_runner(params) + + mock_helper.assert_called_once() + # EvalRunner coalesces None to the no-op default_save_context. + assert runner._save_context is default_save_context + + +# -- end-to-end via registry ------------------------------------------------- + + +async def test_eval_job_through_registry( + resolve_project, task, eval_config, run_config, data_source, params +): + for _ in range(2): + _make_task_run(task, data_source, "eval_set") + + progresses = [ + Progress(complete=0, total=2, errors=0), + Progress(complete=1, total=2, errors=0), + Progress(complete=2, total=2, errors=0), + ] + + registry = JobRegistry() + registry.register_type(EvalJobWorker) + + with _stub_eval_runner_run(progresses): + job = await registry.create("eval", params, project_id=params.project_id) + task_handle = registry._tasks[job.id] + await task_handle + + final = registry._jobs[job.id] + assert final.status == BackgroundJobStatus.SUCCEEDED + assert final.result == {"total": 2, "success": 2, "error": 0} + assert final.progress.success == 2 + assert final.progress.total == 2 + assert final.project_id == "project1" + + +async def test_eval_job_missing_entity_marks_failed( + resolve_project, task, run_config, data_source +): + # A job whose eval/eval_config does not exist: compute_state (run during + # reconciliation) raises, and the registry marks the job failed rather than + # treating the missing entity as "no progress". + bad_params = EvalJobParams( + project_id="project1", + task_id="task1", + eval_id="missing_eval", + eval_config_id="missing_eval_config", + run_config_id="run_config1", + ) + + registry = JobRegistry() + registry.register_type(EvalJobWorker) + + job = await registry.create("eval", bad_params, project_id="project1") + task_handle = registry._tasks[job.id] + await task_handle + + final = registry._jobs[job.id] + assert final.status == BackgroundJobStatus.FAILED + assert final.error is not None diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts index e123cdc80..2828ba19f 100644 --- a/app/web_ui/src/lib/api_schema.d.ts +++ b/app/web_ui/src/lib/api_schema.d.ts @@ -3076,6 +3076,164 @@ export interface paths { patch?: never; trace?: never; }; + "/api/jobs/events": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Stream Job Events + * @description Server-sent events for jobs. Emits an initial `snapshot`, then per-job + * `job` and `deleted` events. A pure observer: disconnecting never stops a job. + */ + get: operations["stream_job_events_api_jobs_events_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List Jobs */ + get: operations["list_jobs_api_jobs_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{type}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Create Job */ + post: operations["create_job_api_jobs__type__post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Job */ + get: operations["get_job_api_jobs__id__get"]; + put?: never; + post?: never; + /** Delete Job */ + delete: operations["delete_job_api_jobs__id__delete"]; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}/result": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Job Result */ + get: operations["get_job_result_api_jobs__id__result_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}/errors": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Job Errors */ + get: operations["get_job_errors_api_jobs__id__errors_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}/pause": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Pause Job */ + post: operations["pause_job_api_jobs__id__pause_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}/resume": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Resume Job */ + post: operations["resume_job_api_jobs__id__resume_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/jobs/{id}/cancel": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Cancel Job */ + post: operations["cancel_job_api_jobs__id__cancel_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; } export type webhooks = Record; export interface components { @@ -3539,6 +3697,11 @@ export interface components { */ provider_type: "builtin" | "custom"; }; + /** + * BackgroundJobStatus + * @enum {string} + */ + BackgroundJobStatus: "pending" | "running" | "paused" | "succeeded" | "failed" | "cancelled"; /** * BasePrompt * @description A prompt for a task. This is the basic data storage format which can be used throughout a project. @@ -4348,6 +4511,44 @@ export interface components { data_strategy: components["schemas"]["ChatStrategy"]; run_config_properties?: components["schemas"]["KilnAgentRunConfigProperties"] | null; }; + /** + * CreateJobRequest + * @description Request body for creating a job. Params are validated per job type. + */ + CreateJobRequest: { + /** + * Params + * @description Type-specific job parameters, validated against the type's params model. + */ + params?: { + [key: string]: unknown; + }; + /** + * Project Id + * @description Project to scope this job to (for filtering/visibility). Falls back to the params' project_id when omitted. + */ + project_id?: string | null; + /** + * Metadata + * @description Free-form pass-through attribution, stored verbatim. + */ + metadata?: { + [key: string]: unknown; + } | null; + }; + /** + * CreateJobResponse + * @description Response returned when a job is created. + */ + CreateJobResponse: { + /** + * Job Id + * @description The id of the newly created job. + */ + job_id: string; + /** @description The job's status immediately after creation. */ + status: components["schemas"]["BackgroundJobStatus"]; + }; /** CreateKilnCopilotApiKeyRequest */ CreateKilnCopilotApiKeyRequest: { /** @@ -6744,6 +6945,94 @@ export interface components { /** Jailbroken Examples */ jailbroken_examples: string; }; + /** + * JobError + * @description Small failure summary stamped on the record. Detail lives in the error log. + */ + JobError: { + /** Error */ + error?: string | null; + /** Detail */ + detail?: { + [key: string]: unknown; + } | null; + }; + /** + * JobProgress + * @description Count-based progress for a job. + * + * Processed = success + error; remaining = total - success - error. The error + * field is a count only — the actual messages live in the per-run error log. + */ + JobProgress: { + /** Total */ + total?: number | null; + /** + * Success + * @default 0 + */ + success: number; + /** + * Error + * @default 0 + */ + error: number; + /** Message */ + message?: string | null; + /** + * Updated At + * Format: date-time + */ + updated_at?: string; + }; + /** + * JobRecord + * @description Ephemeral, in-memory bookkeeping for a single job. Never persisted to disk. + */ + JobRecord: { + /** Id */ + id: string; + /** Type */ + type: string; + status: components["schemas"]["BackgroundJobStatus"]; + /** Run Id */ + run_id?: string | null; + progress?: components["schemas"]["JobProgress"]; + /** Params */ + params?: { + [key: string]: unknown; + }; + /** Result */ + result?: { + [key: string]: unknown; + } | null; + error?: components["schemas"]["JobError"] | null; + /** Metadata */ + metadata?: { + [key: string]: unknown; + }; + /** Project Id */ + project_id?: string | null; + /** + * Supports Pause + * @default false + */ + supports_pause: boolean; + /** + * Created At + * Format: date-time + */ + created_at?: string; + /** + * Updated At + * Format: date-time + */ + updated_at?: string; + /** Started At */ + started_at?: string | null; + /** Ended At */ + ended_at?: string | null; + }; /** * JobStatus * @enum {string} @@ -17508,4 +17797,345 @@ export interface operations { }; }; }; + stream_job_events_api_jobs_events_get: { + parameters: { + query?: { + /** @description Only stream events for this job id. */ + job_id?: string | null; + /** @description Only stream events for this job type. */ + type?: string | null; + /** @description Only stream events for this project id. */ + project_id?: string | null; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + list_jobs_api_jobs_get: { + parameters: { + query?: { + /** @description Filter by job status. */ + status?: components["schemas"]["BackgroundJobStatus"] | null; + /** @description Filter by job type. */ + type?: string | null; + /** @description Filter by project id. */ + project_id?: string | null; + /** @description Only jobs created at or after this ISO-8601 time. */ + since?: string | null; + /** @description Maximum number of jobs to return. */ + limit?: number | null; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["JobRecord"][]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + create_job_api_jobs__type__post: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The registered job type to run. */ + type: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["CreateJobRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 201: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["CreateJobResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + get_job_api_jobs__id__get: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["JobRecord"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + delete_job_api_jobs__id__delete: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + get_job_result_api_jobs__id__result_get: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + [key: string]: unknown; + }; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + get_job_errors_api_jobs__id__errors_get: { + parameters: { + query?: { + /** @description Read the error log for a specific past run id. */ + run_id?: string | null; + }; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + [key: string]: unknown; + }[]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + pause_job_api_jobs__id__pause_post: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 202: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + resume_job_api_jobs__id__resume_post: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 202: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + cancel_job_api_jobs__id__cancel_post: { + parameters: { + query?: never; + header?: never; + path: { + /** @description The job id. */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 202: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; } diff --git a/app/web_ui/src/lib/components/SidebarJobsBadge.svelte b/app/web_ui/src/lib/components/SidebarJobsBadge.svelte new file mode 100644 index 000000000..af843392e --- /dev/null +++ b/app/web_ui/src/lib/components/SidebarJobsBadge.svelte @@ -0,0 +1,32 @@ + + +{#if resolved > 0} + {#if variant === "rail"} + + {label} + + {:else} + + {label} + + {/if} +{/if} diff --git a/app/web_ui/src/lib/components/SidebarJobsBadge.test.ts b/app/web_ui/src/lib/components/SidebarJobsBadge.test.ts new file mode 100644 index 000000000..7873285ec --- /dev/null +++ b/app/web_ui/src/lib/components/SidebarJobsBadge.test.ts @@ -0,0 +1,40 @@ +// @vitest-environment jsdom +import { describe, it, expect, vi } from "vitest" +import { render } from "@testing-library/svelte" +import { writable } from "svelte/store" + +vi.mock("$lib/api_client", () => ({ + base_url: "http://localhost:8757", + client: {}, +})) + +vi.mock("$lib/stores", () => ({ + ui_state: writable({ current_project_id: null }), +})) + +const SidebarJobsBadge = (await import("./SidebarJobsBadge.svelte")).default + +describe("SidebarJobsBadge", () => { + it("renders the count when greater than zero", () => { + const { getByText } = render(SidebarJobsBadge, { props: { count: 3 } }) + expect(getByText("3")).not.toBeNull() + }) + + it("renders nothing when count is zero", () => { + const { container } = render(SidebarJobsBadge, { props: { count: 0 } }) + expect(container.textContent?.trim()).toBe("") + }) + + it("caps the displayed count at 99+", () => { + const { getByText } = render(SidebarJobsBadge, { props: { count: 150 } }) + expect(getByText("99+")).not.toBeNull() + }) + + it("uses the rail variant styling when requested", () => { + const { container } = render(SidebarJobsBadge, { + props: { count: 2, variant: "rail" }, + }) + const span = container.querySelector("span") + expect(span?.className).toContain("absolute") + }) +}) diff --git a/app/web_ui/src/lib/stores/job_status.test.ts b/app/web_ui/src/lib/stores/job_status.test.ts new file mode 100644 index 000000000..4e6f91ce4 --- /dev/null +++ b/app/web_ui/src/lib/stores/job_status.test.ts @@ -0,0 +1,128 @@ +import { describe, it, expect } from "vitest" +import { + available_actions, + is_active, + is_terminal, + job_status_badge_class, + job_status_display, + progress_label, + progress_percent, +} from "./job_status" +import type { BackgroundJobStatus, JobRecord } from "./jobs_api" + +function makeJob(overrides: Partial = {}): JobRecord { + return { + id: "j_1", + type: "noop", + status: "running", + supports_pause: false, + ...overrides, + } +} + +describe("is_active / is_terminal", () => { + it("treats pending, running, paused as active", () => { + expect(is_active("pending")).toBe(true) + expect(is_active("running")).toBe(true) + expect(is_active("paused")).toBe(true) + }) + + it("treats terminal statuses as not active", () => { + expect(is_active("succeeded")).toBe(false) + expect(is_active("failed")).toBe(false) + expect(is_active("cancelled")).toBe(false) + }) + + it("identifies terminal statuses", () => { + expect(is_terminal("succeeded")).toBe(true) + expect(is_terminal("failed")).toBe(true) + expect(is_terminal("cancelled")).toBe(true) + expect(is_terminal("running")).toBe(false) + }) +}) + +describe("available_actions", () => { + it("running without pause support: cancel only", () => { + expect(available_actions(makeJob({ status: "running" }))).toEqual([ + "cancel", + ]) + }) + + it("running with pause support: pause then cancel", () => { + expect( + available_actions(makeJob({ status: "running", supports_pause: true })), + ).toEqual(["pause", "cancel"]) + }) + + it("paused: resume and cancel", () => { + expect( + available_actions(makeJob({ status: "paused", supports_pause: true })), + ).toEqual(["resume", "cancel"]) + }) + + it("pending: cancel only", () => { + expect(available_actions(makeJob({ status: "pending" }))).toEqual([ + "cancel", + ]) + }) + + it("terminal states: delete only", () => { + for (const status of [ + "succeeded", + "failed", + "cancelled", + ] as BackgroundJobStatus[]) { + expect(available_actions(makeJob({ status }))).toEqual(["delete"]) + } + }) +}) + +describe("job_status_display / job_status_badge_class", () => { + const cases: [BackgroundJobStatus, string, string][] = [ + ["pending", "Pending", "badge-ghost"], + ["running", "Running", "badge-info"], + ["paused", "Paused", "badge-warning"], + ["succeeded", "Succeeded", "badge-success"], + ["failed", "Failed", "badge-error"], + ["cancelled", "Cancelled", "badge-ghost"], + ] + it.each(cases)("maps %s", (status, label, badge) => { + expect(job_status_display(status)).toBe(label) + expect(job_status_badge_class(status)).toBe(badge) + }) +}) + +describe("progress_label", () => { + it("shows count only when total is null", () => { + expect(progress_label({ success: 3, error: 0 })).toBe("3") + }) + + it("shows success / total", () => { + expect(progress_label({ success: 3, error: 0, total: 10 })).toBe("3 / 10") + }) + + it("appends errored count when present", () => { + expect(progress_label({ success: 3, error: 2, total: 10 })).toBe( + "3 / 10 (2 errored)", + ) + }) + + it("handles undefined progress", () => { + expect(progress_label(undefined)).toBe("0") + }) +}) + +describe("progress_percent", () => { + it("returns 0 when total is null or zero", () => { + expect(progress_percent({ success: 1, error: 0 })).toBe(0) + expect(progress_percent({ success: 1, error: 0, total: 0 })).toBe(0) + }) + + it("computes processed / total as a percent", () => { + expect(progress_percent({ success: 2, error: 1, total: 10 })).toBe(30) + }) + + it("returns 100 when complete", () => { + expect(progress_percent({ success: 8, error: 2, total: 10 })).toBe(100) + }) +}) diff --git a/app/web_ui/src/lib/stores/job_status.ts b/app/web_ui/src/lib/stores/job_status.ts new file mode 100644 index 000000000..9d6cdfd7c --- /dev/null +++ b/app/web_ui/src/lib/stores/job_status.ts @@ -0,0 +1,109 @@ +import type { BackgroundJobStatus, JobProgress, JobRecord } from "./jobs_api" + +export const ACTIVE_STATUSES: readonly BackgroundJobStatus[] = [ + "pending", + "running", + "paused", +] + +export const TERMINAL_STATUSES: readonly BackgroundJobStatus[] = [ + "succeeded", + "failed", + "cancelled", +] + +export function is_active(status: BackgroundJobStatus): boolean { + return ACTIVE_STATUSES.includes(status) +} + +export function is_terminal(status: BackgroundJobStatus): boolean { + return TERMINAL_STATUSES.includes(status) +} + +export function job_status_display(status: BackgroundJobStatus): string { + switch (status) { + case "pending": + return "Pending" + case "running": + return "Running" + case "paused": + return "Paused" + case "succeeded": + return "Succeeded" + case "failed": + return "Failed" + case "cancelled": + return "Cancelled" + default: { + const exhaustive: never = status + return exhaustive + } + } +} + +export function job_status_badge_class(status: BackgroundJobStatus): string { + switch (status) { + case "running": + return "badge-info" + case "succeeded": + return "badge-success" + case "failed": + return "badge-error" + case "paused": + return "badge-warning" + case "pending": + return "badge-ghost" + case "cancelled": + return "badge-ghost" + default: { + const exhaustive: never = status + return exhaustive + } + } +} + +export type JobAction = "pause" | "resume" | "cancel" | "delete" + +// The set of lifecycle actions valid for a job given its status and whether +// its worker supports pause. Mirrors the state machine (functional_spec §3) and +// the delete policy (architecture open item #7: delete only on terminal state). +export function available_actions(job: JobRecord): JobAction[] { + switch (job.status) { + case "running": { + const actions: JobAction[] = ["cancel"] + if (job.supports_pause) { + actions.unshift("pause") + } + return actions + } + case "paused": + return ["resume", "cancel"] + case "pending": + return ["cancel"] + case "succeeded": + case "failed": + case "cancelled": + return ["delete"] + default: { + const exhaustive: never = job.status + return exhaustive + } + } +} + +export function progress_label(progress: JobProgress | undefined): string { + const success = progress?.success ?? 0 + const total = progress?.total + const base = total == null ? `${success}` : `${success} / ${total}` + const error = progress?.error ?? 0 + return error > 0 ? `${base} (${error} errored)` : base +} + +export function progress_percent(progress: JobProgress | undefined): number { + const total = progress?.total + if (!total || total <= 0) { + return 0 + } + const processed = (progress?.success ?? 0) + (progress?.error ?? 0) + return Math.max(0, Math.min(100, Math.round((processed / total) * 100))) +} diff --git a/app/web_ui/src/lib/stores/jobs_api.test.ts b/app/web_ui/src/lib/stores/jobs_api.test.ts new file mode 100644 index 000000000..84770438c --- /dev/null +++ b/app/web_ui/src/lib/stores/jobs_api.test.ts @@ -0,0 +1,150 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { client } from "$lib/api_client" +import { + cancel_job, + create_job, + delete_job, + get_job, + get_job_errors, + get_job_result, + list_jobs, + pause_job, + resume_job, +} from "./jobs_api" + +vi.mock("$lib/api_client", () => ({ + client: { + GET: vi.fn(), + POST: vi.fn(), + DELETE: vi.fn(), + }, + base_url: "http://localhost:8757", +})) + +const mockGET = client.GET as unknown as ReturnType +const mockPOST = client.POST as unknown as ReturnType +const mockDELETE = client.DELETE as unknown as ReturnType + +describe("jobs_api", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it("list_jobs calls GET /api/jobs with the query and returns data", async () => { + mockGET.mockResolvedValue({ data: [{ id: "j_1" }], error: undefined }) + const result = await list_jobs({ project_id: "p_1", status: "running" }) + expect(mockGET).toHaveBeenCalledWith("/api/jobs", { + params: { query: { project_id: "p_1", status: "running" } }, + }) + expect(result).toEqual([{ id: "j_1" }]) + }) + + it("list_jobs throws when the client returns an error", async () => { + mockGET.mockResolvedValue({ data: undefined, error: { detail: "boom" } }) + await expect(list_jobs()).rejects.toEqual({ detail: "boom" }) + }) + + it("get_job calls GET /api/jobs/{id}", async () => { + mockGET.mockResolvedValue({ data: { id: "j_2" }, error: undefined }) + const result = await get_job("j_2") + expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}", { + params: { path: { id: "j_2" } }, + }) + expect(result).toEqual({ id: "j_2" }) + }) + + it("create_job calls POST /api/jobs/{type} with params and metadata", async () => { + mockPOST.mockResolvedValue({ + data: { job_id: "j_3", status: "pending" }, + error: undefined, + }) + const result = await create_job("eval", { eval_id: "e_1" }, { src: "ui" }) + expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{type}", { + params: { path: { type: "eval" } }, + body: { + params: { eval_id: "e_1" }, + metadata: { src: "ui" }, + project_id: null, + }, + }) + expect(result).toEqual({ job_id: "j_3", status: "pending" }) + }) + + it("create_job passes an explicit project_id in the body", async () => { + mockPOST.mockResolvedValue({ + data: { job_id: "j_3b", status: "pending" }, + error: undefined, + }) + await create_job("noop", { steps: 5 }, null, "p_current") + expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{type}", { + params: { path: { type: "noop" } }, + body: { params: { steps: 5 }, metadata: null, project_id: "p_current" }, + }) + }) + + it("get_job_result calls GET /api/jobs/{id}/result", async () => { + mockGET.mockResolvedValue({ data: { total: 5 }, error: undefined }) + const result = await get_job_result("j_4") + expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/result", { + params: { path: { id: "j_4" } }, + }) + expect(result).toEqual({ total: 5 }) + }) + + it("get_job_errors calls GET /api/jobs/{id}/errors with optional run_id", async () => { + mockGET.mockResolvedValue({ + data: [{ error_message: "oops" }], + error: undefined, + }) + const result = await get_job_errors("j_5", "run_xyz") + expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/errors", { + params: { path: { id: "j_5" }, query: { run_id: "run_xyz" } }, + }) + expect(result).toEqual([{ error_message: "oops" }]) + }) + + it("get_job_errors omits run_id query when not provided", async () => { + mockGET.mockResolvedValue({ data: [], error: undefined }) + await get_job_errors("j_6") + expect(mockGET).toHaveBeenCalledWith("/api/jobs/{id}/errors", { + params: { path: { id: "j_6" }, query: {} }, + }) + }) + + it("pause_job calls POST /api/jobs/{id}/pause", async () => { + mockPOST.mockResolvedValue({ data: undefined, error: undefined }) + await pause_job("j_7") + expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/pause", { + params: { path: { id: "j_7" } }, + }) + }) + + it("resume_job calls POST /api/jobs/{id}/resume", async () => { + mockPOST.mockResolvedValue({ data: undefined, error: undefined }) + await resume_job("j_8") + expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/resume", { + params: { path: { id: "j_8" } }, + }) + }) + + it("cancel_job calls POST /api/jobs/{id}/cancel", async () => { + mockPOST.mockResolvedValue({ data: undefined, error: undefined }) + await cancel_job("j_9") + expect(mockPOST).toHaveBeenCalledWith("/api/jobs/{id}/cancel", { + params: { path: { id: "j_9" } }, + }) + }) + + it("delete_job calls DELETE /api/jobs/{id}", async () => { + mockDELETE.mockResolvedValue({ data: undefined, error: undefined }) + await delete_job("j_10") + expect(mockDELETE).toHaveBeenCalledWith("/api/jobs/{id}", { + params: { path: { id: "j_10" } }, + }) + }) + + it("lifecycle calls throw on client error", async () => { + mockPOST.mockResolvedValue({ data: undefined, error: { detail: "409" } }) + await expect(cancel_job("j_11")).rejects.toEqual({ detail: "409" }) + }) +}) diff --git a/app/web_ui/src/lib/stores/jobs_api.ts b/app/web_ui/src/lib/stores/jobs_api.ts new file mode 100644 index 000000000..d05993011 --- /dev/null +++ b/app/web_ui/src/lib/stores/jobs_api.ts @@ -0,0 +1,118 @@ +import { client } from "$lib/api_client" +import type { components } from "$lib/api_schema" + +export type JobRecord = components["schemas"]["JobRecord"] +export type JobProgress = components["schemas"]["JobProgress"] +export type JobError = components["schemas"]["JobError"] +export type BackgroundJobStatus = components["schemas"]["BackgroundJobStatus"] + +export type JobErrorEntry = { + error_message?: string +} & Record + +export type ListJobsQuery = { + status?: BackgroundJobStatus + type?: string + project_id?: string + since?: string + limit?: number +} + +export async function list_jobs( + query: ListJobsQuery = {}, +): Promise { + const { data, error } = await client.GET("/api/jobs", { + params: { query }, + }) + if (error) { + throw error + } + return data +} + +export async function get_job(id: string): Promise { + const { data, error } = await client.GET("/api/jobs/{id}", { + params: { path: { id } }, + }) + if (error) { + throw error + } + return data +} + +export async function create_job( + type: string, + params: Record = {}, + metadata: Record | null = null, + project_id: string | null = null, +): Promise { + const { data, error } = await client.POST("/api/jobs/{type}", { + params: { path: { type } }, + body: { params, metadata, project_id }, + }) + if (error) { + throw error + } + return data +} + +export async function get_job_result( + id: string, +): Promise> { + const { data, error } = await client.GET("/api/jobs/{id}/result", { + params: { path: { id } }, + }) + if (error) { + throw error + } + return data +} + +export async function get_job_errors( + id: string, + run_id?: string, +): Promise { + const { data, error } = await client.GET("/api/jobs/{id}/errors", { + params: { path: { id }, query: run_id ? { run_id } : {} }, + }) + if (error) { + throw error + } + return data as JobErrorEntry[] +} + +export async function pause_job(id: string): Promise { + const { error } = await client.POST("/api/jobs/{id}/pause", { + params: { path: { id } }, + }) + if (error) { + throw error + } +} + +export async function resume_job(id: string): Promise { + const { error } = await client.POST("/api/jobs/{id}/resume", { + params: { path: { id } }, + }) + if (error) { + throw error + } +} + +export async function cancel_job(id: string): Promise { + const { error } = await client.POST("/api/jobs/{id}/cancel", { + params: { path: { id } }, + }) + if (error) { + throw error + } +} + +export async function delete_job(id: string): Promise { + const { error } = await client.DELETE("/api/jobs/{id}", { + params: { path: { id } }, + }) + if (error) { + throw error + } +} diff --git a/app/web_ui/src/lib/stores/jobs_store.test.ts b/app/web_ui/src/lib/stores/jobs_store.test.ts new file mode 100644 index 000000000..2eb1d5def --- /dev/null +++ b/app/web_ui/src/lib/stores/jobs_store.test.ts @@ -0,0 +1,305 @@ +// @vitest-environment jsdom +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest" +import { get, writable } from "svelte/store" +import type { JobRecord } from "./jobs_api" + +// ui_state drives the project filter. Provide a real writable so we can flip +// the current project mid-test. +const ui_state = writable<{ current_project_id: string | null }>({ + current_project_id: null, +}) + +vi.mock("$lib/api_client", () => ({ + base_url: "http://localhost:8757", + client: {}, +})) + +vi.mock("$lib/stores", () => ({ + ui_state, +})) + +// Spy on every mutation entry point. The store is a pure observer: it must +// never call any of these. We assert that explicitly on teardown below. +const mutationSpies = { + pause_job: vi.fn(), + resume_job: vi.fn(), + cancel_job: vi.fn(), + delete_job: vi.fn(), + create_job: vi.fn(), +} +vi.mock("./jobs_api", () => mutationSpies) + +// A controllable fake EventSource installed on globalThis. Records construction +// URLs and close() calls so tests can assert the pure-observer / reconnect +// behavior without a real network connection. +type Listener = (event: MessageEvent) => void + +class FakeEventSource { + static instances: FakeEventSource[] = [] + url: string + closed = false + onerror: ((this: EventSource, ev: Event) => void) | null = null + private listeners: Record = {} + + constructor(url: string) { + this.url = url + FakeEventSource.instances.push(this) + } + + addEventListener(type: string, listener: Listener) { + ;(this.listeners[type] ||= []).push(listener) + } + + close() { + this.closed = true + } + + emit(type: string, data: unknown) { + const event = { data: JSON.stringify(data) } as MessageEvent + for (const listener of this.listeners[type] || []) { + listener(event) + } + } + + fail() { + this.onerror?.call(this as unknown as EventSource, new Event("error")) + } + + static latest(): FakeEventSource { + return FakeEventSource.instances[FakeEventSource.instances.length - 1] + } + + static reset() { + FakeEventSource.instances = [] + } +} + +function makeJob(overrides: Partial = {}): JobRecord { + return { + id: "j_1", + type: "noop", + status: "running", + supports_pause: true, + created_at: "2026-05-28T12:00:00Z", + ...overrides, + } +} + +// Import the module fresh per test so the ref-counted connection and the +// module-level ui_state subscription start clean. +async function loadStore() { + vi.resetModules() + ui_state.set({ current_project_id: null }) + FakeEventSource.reset() + return await import("./jobs_store") +} + +describe("jobs_store", () => { + beforeEach(() => { + vi.useFakeTimers() + // @ts-expect-error install fake on global + globalThis.EventSource = FakeEventSource + for (const spy of Object.values(mutationSpies)) { + spy.mockClear() + } + }) + + afterEach(() => { + vi.useRealTimers() + vi.restoreAllMocks() + }) + + it("snapshot replaces the whole map", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + + source.emit("snapshot", { + jobs: [makeJob({ id: "j_1" }), makeJob({ id: "j_2" })], + }) + expect( + get(jobs) + .map((j) => j.id) + .sort(), + ).toEqual(["j_1", "j_2"]) + + // A second snapshot fully replaces the prior contents. + source.emit("snapshot", { jobs: [makeJob({ id: "j_3" })] }) + expect(get(jobs).map((j) => j.id)).toEqual(["j_3"]) + unsub() + }) + + it("job event inserts a new job", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + source.emit("snapshot", { jobs: [] }) + source.emit("job", makeJob({ id: "j_new" })) + expect(get(jobs).map((j) => j.id)).toEqual(["j_new"]) + unsub() + }) + + it("job event upserts status + progress for an existing job", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + source.emit("snapshot", { + jobs: [ + makeJob({ + id: "j_1", + status: "running", + progress: { success: 1, error: 0, total: 10 }, + }), + ], + }) + source.emit( + "job", + makeJob({ + id: "j_1", + status: "succeeded", + progress: { success: 10, error: 0, total: 10 }, + }), + ) + const job = get(jobs)[0] + expect(job.status).toBe("succeeded") + expect(job.progress?.success).toBe(10) + unsub() + }) + + it("deleted event removes a job; unknown id is a no-op", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + source.emit("snapshot", { + jobs: [makeJob({ id: "j_1" }), makeJob({ id: "j_2" })], + }) + source.emit("deleted", { id: "j_1" }) + expect(get(jobs).map((j) => j.id)).toEqual(["j_2"]) + source.emit("deleted", { id: "does_not_exist" }) + expect(get(jobs).map((j) => j.id)).toEqual(["j_2"]) + unsub() + }) + + it("reconnects on error and re-syncs from the fresh snapshot", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const first = FakeEventSource.latest() + first.emit("snapshot", { jobs: [makeJob({ id: "stale" })] }) + expect(get(jobs).map((j) => j.id)).toEqual(["stale"]) + + first.fail() + expect(first.closed).toBe(true) + + // After the backoff a new EventSource is constructed. + vi.advanceTimersByTime(2000) + expect(FakeEventSource.instances.length).toBe(2) + const second = FakeEventSource.latest() + expect(second).not.toBe(first) + + second.emit("snapshot", { jobs: [makeJob({ id: "fresh" })] }) + expect(get(jobs).map((j) => j.id)).toEqual(["fresh"]) + unsub() + }) + + it("active_jobs_count counts only pending/running/paused", async () => { + const { jobs, active_jobs_count } = await loadStore() + const unsubJobs = jobs.subscribe(() => {}) + const unsub = active_jobs_count.subscribe(() => {}) + const source = FakeEventSource.latest() + source.emit("snapshot", { + jobs: [ + makeJob({ id: "a", status: "pending" }), + makeJob({ id: "b", status: "running" }), + makeJob({ id: "c", status: "paused" }), + makeJob({ id: "d", status: "succeeded" }), + makeJob({ id: "e", status: "failed" }), + ], + }) + expect(get(active_jobs_count)).toBe(3) + unsub() + unsubJobs() + }) + + it("closes the EventSource when the last subscriber unsubscribes (pure observer)", async () => { + const { jobs } = await loadStore() + const unsub1 = jobs.subscribe(() => {}) + const unsub2 = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + // Only one EventSource is opened regardless of subscriber count. + expect(FakeEventSource.instances.length).toBe(1) + + unsub1() + expect(source.closed).toBe(false) + unsub2() + expect(source.closed).toBe(true) + }) + + it("opens with the project filter and re-opens when the project changes", async () => { + const { jobs } = await loadStore() + ui_state.set({ current_project_id: "p_1" }) + const unsub = jobs.subscribe(() => {}) + const first = FakeEventSource.latest() + expect(first.url).toContain("project_id=p_1") + + ui_state.set({ current_project_id: "p_2" }) + expect(first.closed).toBe(true) + const second = FakeEventSource.latest() + expect(second).not.toBe(first) + expect(second.url).toContain("project_id=p_2") + unsub() + }) + + it("ignores ui_state changes that don't touch current_project_id", async () => { + const { jobs } = await loadStore() + ui_state.set({ current_project_id: "p_1" }) + const unsub = jobs.subscribe(() => {}) + const first = FakeEventSource.latest() + expect(FakeEventSource.instances.length).toBe(1) + + // An unrelated ui_state update with the same project id must not re-open. + ui_state.set({ current_project_id: "p_1", other: "x" } as { + current_project_id: string | null + }) + expect(FakeEventSource.instances.length).toBe(1) + expect(first.closed).toBe(false) + unsub() + }) + + it("reports an errored connection when the stream fails before syncing", async () => { + const { jobs, connection } = await loadStore() + const unsub = jobs.subscribe(() => {}) + expect(get(connection)).toBe("connecting") + + FakeEventSource.latest().fail() + expect(get(connection)).toBe("errored") + unsub() + }) + + it("connection becomes open once a snapshot arrives", async () => { + const { jobs, connection } = await loadStore() + const unsub = jobs.subscribe(() => {}) + FakeEventSource.latest().emit("snapshot", { jobs: [] }) + expect(get(connection)).toBe("open") + unsub() + }) + + it("never calls a mutation endpoint (pure observer) across its full lifecycle", async () => { + const { jobs } = await loadStore() + const unsub = jobs.subscribe(() => {}) + const source = FakeEventSource.latest() + + // Drive every observable path: snapshot, job upsert, deletion, an error + + // reconnect, a project switch, and finally teardown. + source.emit("snapshot", { jobs: [makeJob({ id: "j_1" })] }) + source.emit("job", makeJob({ id: "j_1", status: "succeeded" })) + source.emit("deleted", { id: "j_1" }) + source.fail() + vi.advanceTimersByTime(2000) + ui_state.set({ current_project_id: "p_switch" }) + unsub() + + for (const spy of Object.values(mutationSpies)) { + expect(spy).not.toHaveBeenCalled() + } + }) +}) diff --git a/app/web_ui/src/lib/stores/jobs_store.ts b/app/web_ui/src/lib/stores/jobs_store.ts new file mode 100644 index 000000000..1718f95bf --- /dev/null +++ b/app/web_ui/src/lib/stores/jobs_store.ts @@ -0,0 +1,244 @@ +import { derived, get, writable, type Readable } from "svelte/store" +import { base_url } from "$lib/api_client" +import { ui_state } from "$lib/stores" +import type { JobRecord } from "./jobs_api" +import { is_active } from "./job_status" + +const RECONNECT_DELAY_MS = 2000 + +type JobsMap = Map + +// Connection state surfaced to the UI so the panel can distinguish "still +// connecting" from "can't connect". Stays a pure observer: this only reports +// the EventSource lifecycle, it never triggers a job mutation. +export type JobsConnection = "idle" | "connecting" | "open" | "errored" + +function createJobsStore() { + const jobs_map = writable(new Map()) + + // True once the first `snapshot` event for the current connection has been + // processed. Lets the panel show a loading state until the stream syncs. + const synced = writable(false) + + // Lifecycle of the underlying EventSource. The panel pairs this with `synced` + // to show a "can't connect / retrying" affordance instead of spinning forever + // when the stream errors before its first snapshot. + const connection = writable("idle") + + let event_source: EventSource | null = null + let reconnect_timer: ReturnType | null = null + let subscriber_count = 0 + let current_project_id: string | null = null + + function build_url(): string { + const url = new URL(`${base_url}/api/jobs/events`) + if (current_project_id) { + url.searchParams.set("project_id", current_project_id) + } + return url.toString() + } + + function upsert(record: JobRecord) { + jobs_map.update((map) => { + const next = new Map(map) + next.set(record.id, record) + return next + }) + } + + function remove(id: string) { + jobs_map.update((map) => { + if (!map.has(id)) { + return map + } + const next = new Map(map) + next.delete(id) + return next + }) + } + + function replace_all(records: JobRecord[]) { + const next: JobsMap = new Map() + for (const record of records) { + next.set(record.id, record) + } + jobs_map.set(next) + } + + function handle_snapshot(event: MessageEvent) { + try { + const parsed = JSON.parse(event.data) as { jobs?: JobRecord[] } + replace_all(parsed.jobs ?? []) + synced.set(true) + connection.set("open") + } catch { + // Ignore malformed payloads; the next snapshot will re-sync. + } + } + + function handle_job(event: MessageEvent) { + try { + const record = JSON.parse(event.data) as JobRecord + upsert(record) + } catch { + // Ignore malformed payloads. + } + } + + function handle_deleted(event: MessageEvent) { + try { + const parsed = JSON.parse(event.data) as { id?: string } + if (parsed.id) { + remove(parsed.id) + } + } catch { + // Ignore malformed payloads. + } + } + + function clear_reconnect() { + if (reconnect_timer !== null) { + clearTimeout(reconnect_timer) + reconnect_timer = null + } + } + + function schedule_reconnect() { + if (reconnect_timer !== null || subscriber_count === 0) { + return + } + reconnect_timer = setTimeout(() => { + reconnect_timer = null + if (subscriber_count > 0) { + connect() + } + }, RECONNECT_DELAY_MS) + } + + function close_source() { + if (event_source) { + event_source.close() + event_source = null + } + } + + function connect() { + // Pure observer: opening or closing this stream never affects a job. A + // dropped connection is recovered by reconnecting; the fresh `snapshot` + // re-syncs the map (no Last-Event-ID needed). + const EventSourceCtor = globalThis.EventSource + if (!EventSourceCtor) { + return + } + close_source() + clear_reconnect() + synced.set(false) + connection.set("connecting") + + const source = new EventSourceCtor(build_url()) + event_source = source + + source.addEventListener("snapshot", handle_snapshot as EventListener) + source.addEventListener("job", handle_job as EventListener) + source.addEventListener("deleted", handle_deleted as EventListener) + source.onerror = () => { + // Only reconnect if this is still the active source (avoids racing a + // teardown or a project switch). + if (event_source !== source) { + return + } + close_source() + connection.set("errored") + schedule_reconnect() + } + } + + function disconnect() { + close_source() + clear_reconnect() + synced.set(false) + connection.set("idle") + } + + // Re-open the stream against a new project filter. Called by the ui_state + // subscription below and exposed for tests. + function set_project(project_id: string | null) { + if (project_id === current_project_id) { + return + } + current_project_id = project_id + if (subscriber_count > 0) { + connect() + } + } + + // Track the active project from UI state so the badge/panel stay scoped to + // the project the user is viewing. `ui_state` fires on any field change, so + // we react only when `current_project_id` actually differs from what we last + // saw — keeping rapid project switches correct (the old source is closed by + // `connect()` before the new one opens, so there's no leak). + current_project_id = get(ui_state).current_project_id ?? null + let last_seen_project_id = current_project_id + ui_state.subscribe((state) => { + const next = state.current_project_id ?? null + if (next === last_seen_project_id) { + return + } + last_seen_project_id = next + set_project(next) + }) + + const subscribe: Readable["subscribe"] = (run, invalidate) => { + if (subscriber_count === 0) { + connect() + } + subscriber_count += 1 + const unsubscribe = jobs_map.subscribe(run, invalidate) + return () => { + unsubscribe() + subscriber_count -= 1 + if (subscriber_count <= 0) { + subscriber_count = 0 + disconnect() + } + } + } + + return { + subscribe, + synced: { subscribe: synced.subscribe } as Readable, + connection: { + subscribe: connection.subscribe, + } as Readable, + set_project, + // Exposed for tests / explicit teardown; not part of normal usage. + _disconnect: disconnect, + } +} + +export const jobs_store = createJobsStore() + +export const synced: Readable = jobs_store.synced + +export const connection: Readable = jobs_store.connection + +export const jobs: Readable = derived(jobs_store, ($map) => + Array.from($map.values()).sort( + (a, b) => + new Date(b.created_at ?? 0).getTime() - + new Date(a.created_at ?? 0).getTime(), + ), +) + +export const active_jobs_count: Readable = derived( + jobs_store, + ($map) => { + let count = 0 + for (const job of $map.values()) { + if (is_active(job.status)) { + count += 1 + } + } + return count + }, +) diff --git a/app/web_ui/src/lib/ui/icons/jobs_icon.svelte b/app/web_ui/src/lib/ui/icons/jobs_icon.svelte new file mode 100644 index 000000000..065ddaac4 --- /dev/null +++ b/app/web_ui/src/lib/ui/icons/jobs_icon.svelte @@ -0,0 +1,23 @@ + + + + + diff --git a/app/web_ui/src/lib/ui/section.ts b/app/web_ui/src/lib/ui/section.ts index 0dd772847..3fbeccf63 100644 --- a/app/web_ui/src/lib/ui/section.ts +++ b/app/web_ui/src/lib/ui/section.ts @@ -12,5 +12,6 @@ export enum Section { Skills, Optimize, Assistant, + Jobs, None, } diff --git a/app/web_ui/src/routes/(app)/+layout.svelte b/app/web_ui/src/routes/(app)/+layout.svelte index ac367455b..3c1c66e92 100644 --- a/app/web_ui/src/routes/(app)/+layout.svelte +++ b/app/web_ui/src/routes/(app)/+layout.svelte @@ -18,6 +18,8 @@ import ToolsIcon from "$lib/ui/icons/tools_icon.svelte" import ChatBar from "./chat_bar.svelte" import ChatIcon from "$lib/ui/icons/chat_icon.svelte" + import JobsIcon from "$lib/ui/icons/jobs_icon.svelte" + import SidebarJobsBadge from "$lib/components/SidebarJobsBadge.svelte" import { Section } from "$lib/ui/section" import Dialog from "$lib/ui/dialog.svelte" import SidebarRail from "./sidebar_rail.svelte" @@ -108,6 +110,8 @@ section = Section.Specs } else if (path_start("/optimize", $page.url.pathname)) { section = Section.Optimize + } else if (path_start("/jobs", $page.url.pathname)) { + section = Section.Jobs } else if (path_start("/assistant", $page.url.pathname)) { section = Section.Assistant } else { @@ -276,6 +280,16 @@ > + +