|
| 1 | +"""The transport-agnostic brain of the ``/goal`` loop. |
| 2 | +
|
| 3 | +``GoalController`` decides -- after each agent run finishes -- whether to |
| 4 | +continue (with a followup message) or stop (with a ``GoalOutcome``). It performs |
| 5 | +NO I/O: a *driver* (the sync ``run_goal``, or an async agent-server task) owns |
| 6 | +sending messages and running the agent; the controller only judges and decides. |
| 7 | +That split lets the sync and async drivers share identical decision logic. |
| 8 | +""" |
| 9 | + |
| 10 | +from collections.abc import Sequence |
| 11 | +from typing import Literal |
| 12 | + |
| 13 | +from pydantic import BaseModel, Field |
| 14 | + |
| 15 | +from openhands.sdk.conversation.goal.judge import GoalVerdict, judge_goal |
| 16 | +from openhands.sdk.conversation.goal.prompts import FOLLOWUP_PROMPT |
| 17 | +from openhands.sdk.event import Event |
| 18 | +from openhands.sdk.llm import LLM |
| 19 | +from openhands.sdk.logger import get_logger |
| 20 | + |
| 21 | + |
| 22 | +logger = get_logger(__name__) |
| 23 | + |
| 24 | + |
| 25 | +class GoalOutcome(BaseModel): |
| 26 | + """Result of a ``/goal`` loop. |
| 27 | +
|
| 28 | + ``status`` distinguishes genuine completion from hitting the iteration cap, |
| 29 | + so a driver never has to guess whether a silent finish meant success. |
| 30 | + """ |
| 31 | + |
| 32 | + status: Literal["complete", "capped"] |
| 33 | + iterations: int = Field(ge=1, description="Number of audit rounds performed.") |
| 34 | + verdict: GoalVerdict |
| 35 | + |
| 36 | + |
| 37 | +GoalStatusName = Literal["running", "complete", "capped", "interrupted"] |
| 38 | +"""Lifecycle state of a ``/goal`` loop.""" |
| 39 | + |
| 40 | + |
| 41 | +class GoalStatus(BaseModel): |
| 42 | + """Live status of a ``/goal`` loop, for a UI progress chip. |
| 43 | +
|
| 44 | + The agent server publishes this as the ``value`` of a |
| 45 | + ``ConversationStateUpdateEvent`` with ``key="goal"`` at each lifecycle point |
| 46 | + (start, each round, and the terminal/interrupted state). |
| 47 | + """ |
| 48 | + |
| 49 | + active: bool = Field(description="Whether the goal loop is still running.") |
| 50 | + status: GoalStatusName |
| 51 | + iteration: int = Field(ge=0, description="Audit rounds completed so far.") |
| 52 | + max_iterations: int = Field(ge=1) |
| 53 | + objective: str |
| 54 | + verdict: GoalVerdict | None = Field( |
| 55 | + default=None, description="Last judge verdict; set once the loop ends." |
| 56 | + ) |
| 57 | + |
| 58 | + |
| 59 | +class GoalContinue(BaseModel): |
| 60 | + """Decision to keep going: send ``followup`` before the next run.""" |
| 61 | + |
| 62 | + followup: str |
| 63 | + |
| 64 | + |
| 65 | +class GoalDone(BaseModel): |
| 66 | + """Decision to stop: the loop finished with ``outcome``.""" |
| 67 | + |
| 68 | + outcome: GoalOutcome |
| 69 | + |
| 70 | + |
| 71 | +GoalStep = GoalContinue | GoalDone |
| 72 | +"""One decision returned by :meth:`GoalController.on_run_finished`.""" |
| 73 | + |
| 74 | + |
| 75 | +class GoalController: |
| 76 | + """Judges goal completion and decides continue-vs-stop, without doing I/O. |
| 77 | +
|
| 78 | + A driver calls :meth:`start` once to get the first message to send, then |
| 79 | + calls :meth:`on_run_finished` after every agent run to get the next |
| 80 | + decision. The controller owns the iteration count and the ``max_iterations`` |
| 81 | + cap, so drivers stay trivial. |
| 82 | + """ |
| 83 | + |
| 84 | + def __init__( |
| 85 | + self, objective: str, judge_llm: LLM, *, max_iterations: int = 10 |
| 86 | + ) -> None: |
| 87 | + if not objective.strip(): |
| 88 | + raise ValueError("Goal objective must not be empty.") |
| 89 | + if max_iterations < 1: |
| 90 | + raise ValueError("max_iterations must be >= 1.") |
| 91 | + self.objective = objective |
| 92 | + self.judge_llm = judge_llm |
| 93 | + self.max_iterations = max_iterations |
| 94 | + self.iteration = 0 |
| 95 | + |
| 96 | + def start(self) -> str: |
| 97 | + """Return the first message a driver should send (the objective).""" |
| 98 | + return self.objective |
| 99 | + |
| 100 | + def on_run_finished(self, events: Sequence[Event]) -> GoalStep: |
| 101 | + """Judge the objective after a run and decide whether to continue. |
| 102 | +
|
| 103 | + Increments the iteration count, audits ``events`` with the judge LLM, |
| 104 | + and returns a :class:`GoalContinue` (with a followup) or a terminal |
| 105 | + :class:`GoalDone` (with a :class:`GoalOutcome`). |
| 106 | + """ |
| 107 | + self.iteration += 1 |
| 108 | + verdict = judge_goal(self.judge_llm, self.objective, events) |
| 109 | + logger.info( |
| 110 | + "Goal audit %d/%d: score=%.2f complete=%s", |
| 111 | + self.iteration, |
| 112 | + self.max_iterations, |
| 113 | + verdict.score, |
| 114 | + verdict.complete, |
| 115 | + ) |
| 116 | + if verdict.complete: |
| 117 | + return GoalDone( |
| 118 | + outcome=GoalOutcome( |
| 119 | + status="complete", iterations=self.iteration, verdict=verdict |
| 120 | + ) |
| 121 | + ) |
| 122 | + if self.iteration >= self.max_iterations: |
| 123 | + return GoalDone( |
| 124 | + outcome=GoalOutcome( |
| 125 | + status="capped", iterations=self.iteration, verdict=verdict |
| 126 | + ) |
| 127 | + ) |
| 128 | + missing = verdict.missing or "Some requirements are not yet verified." |
| 129 | + followup = FOLLOWUP_PROMPT.format(iteration=self.iteration, missing=missing) |
| 130 | + return GoalContinue(followup=followup) |
0 commit comments