Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,95 @@ import { createOpencodeClient } from "@opencode-ai/sdk"
import type { AssistantMessage, Session } from "@opencode-ai/sdk"
import type { BoulderState } from "../../features/boulder-state"
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
import { classifyFinalWaveVerdict } from "./final-wave-approval-gate"
import { createAtlasHook } from "./index"

type AtlasHookContext = Parameters<typeof createAtlasHook>[0]
type PromptMock = ReturnType<typeof mock>

describe("classifyFinalWaveVerdict", () => {
test("returns approve when the output carries an APPROVE verdict", () => {
// given
const output = "Tasks [4/4 compliant] | VERDICT: APPROVE"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("approve")
})

test("returns reject when the output carries a REJECT verdict", () => {
// given
const output = "Tasks [2/4 compliant] | VERDICT: REJECT"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("reject")
})

test("returns missing when the output has no verdict token", () => {
// given
const output = "Implementation finished successfully with all checks green"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("missing")
})

test("returns missing when the output ends on a bash call with no verdict", () => {
// given
const output = `Ran the test suite

\`\`\`bash
bun test packages/omo-opencode/src/hooks/atlas/final-wave-approval-gate.test.ts
\`\`\``

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("missing")
})

test("matches the approve verdict case-insensitively", () => {
// given
const output = "summary line\nverdict: approve"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("approve")
})

test("matches the reject verdict case-insensitively", () => {
// given
const output = "summary line\nVeRdIcT: ReJeCt"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("reject")
})

test("prefers approve over reject when both tokens appear", () => {
// given
const output = "VERDICT: REJECT then revised to VERDICT: APPROVE"

// when
const verdict = classifyFinalWaveVerdict(output)

// then
expect(verdict).toBe("approve")
})
})

describe("Atlas final verification approval gate", () => {
let testDirectory = ""

Expand Down
13 changes: 13 additions & 0 deletions packages/omo-opencode/src/hooks/atlas/final-wave-approval-gate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@ import type { SessionState } from "./types"
import { readFinalWavePlanState } from "./final-wave-plan-state"

const APPROVE_VERDICT_PATTERN = /\bVERDICT:\s*APPROVE\b/i
const REJECT_VERDICT_PATTERN = /\bVERDICT:\s*REJECT\b/i

export function classifyFinalWaveVerdict(output: string): "approve" | "reject" | "missing" {
if (APPROVE_VERDICT_PATTERN.test(output)) {
return "approve"
}

if (REJECT_VERDICT_PATTERN.test(output)) {
return "reject"
}

return "missing"
}

function clearFinalWaveApprovalTracking(sessionState: SessionState): void {
sessionState.pendingFinalWaveTaskCount = undefined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ import {
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
import { log } from "../../shared/logger"
import { syncBackgroundLaunchSessionTracking } from "./background-launch-session-tracking"
import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
import { classifyFinalWaveVerdict, shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
import { readFinalWavePlanState } from "./final-wave-plan-state"
import { HOOK_NAME } from "./hook-name"
import { extractSessionIdFromOutput, validateSubagentSessionId } from "./subagent-session-id"
import { resolvePreferredSessionId, resolveTaskContext } from "./task-context"
import { isTrackedTaskChecked } from "./tool-execute-after-plan-tasks"
import type { PendingTaskRef, SessionState, ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
import {
buildAdvanceDirective,
buildCompletionGate,
buildFinalWaveApprovalReminder,
buildMissingVerdictEscalation,
buildOrchestratorReminder,
buildStandaloneVerificationReminder,
} from "./verification-reminders"
Expand All @@ -42,6 +45,8 @@ export async function handleSubagentCompletionAfter(input: {
toolInput: ToolExecuteAfterInput
toolOutput: ToolExecuteAfterOutput
metadataSessionId: string | undefined
/** True when the gate fired from a non-`task` plugin tool retrieval (e.g. `background_output`), not a fresh `task` completion. */
isPluginToolRetrieval?: boolean
}): Promise<void> {
const {
ctx,
Expand All @@ -53,6 +58,7 @@ export async function handleSubagentCompletionAfter(input: {
toolInput,
toolOutput,
metadataSessionId,
isPluginToolRetrieval = false,
} = input
const outputStr = typeof toolOutput.output === "string" ? toolOutput.output : ""
const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined
Expand Down Expand Up @@ -184,21 +190,74 @@ export async function handleSubagentCompletionAfter(input: {
})
: false

const finalWavePlanState = readFinalWavePlanState(planPath)
const isFinalWaveTask = currentTask?.key.startsWith("final-wave:") === true
|| ((finalWavePlanState?.pendingImplementationTaskCount ?? 1) === 0
&& (finalWavePlanState?.pendingFinalWaveTaskCount ?? 0) > 0)
const isMissingFinalWaveVerdict = isFinalWaveTask
&& classifyFinalWaveVerdict(originalResponse) === "missing"

if (sessionState) {
sessionState.waitingForFinalWaveApproval = shouldPauseForApproval
if (sessionState.activeContinuationPlanPath !== undefined
&& sessionState.activeContinuationPlanPath !== planPath) {
sessionState.verifiedTaskKeys = undefined
}

sessionState.waitingForFinalWaveApproval = shouldPauseForApproval || isMissingFinalWaveVerdict

if (shouldPauseForApproval && sessionState.pendingRetryTimer) {
if ((shouldPauseForApproval || isMissingFinalWaveVerdict) && sessionState.pendingRetryTimer) {
clearTimeout(sessionState.pendingRetryTimer)
sessionState.pendingRetryTimer = undefined
}
}

const leadReminder = shouldPauseForApproval
? buildFinalWaveApprovalReminder(workScopedBoulderState.plan_name, progress, preferredSessionId)
: buildCompletionGate(workScopedBoulderState.plan_name, preferredSessionId)
const followupReminder = shouldPauseForApproval
? null
: buildOrchestratorReminder(workScopedBoulderState.plan_name, progress, preferredSessionId, autoCommit, false)
const isAlreadyVerified = currentTask && !isFinalWaveTask
? isTrackedTaskChecked(planPath, currentTask.key)
|| sessionState?.verifiedTaskKeys?.has(currentTask.key) === true
|| (isPluginToolRetrieval && trackedTaskSession !== null)
: false

let leadReminder: string
let followupReminder: string | null
if (isMissingFinalWaveVerdict) {
await ctx.client.tui
.showToast({
body: {
title: "Final review incomplete",
message: "A reviewer returned no clear verdict. Boulder paused - confirm or re-run the review.",
variant: "warning" as const,
duration: 10000,
},
})
.catch(() => {})
leadReminder = buildMissingVerdictEscalation(
workScopedBoulderState.plan_name,
currentTask?.label ?? "the final-wave task",
preferredSessionId,
)
followupReminder = null
} else if (shouldPauseForApproval) {
leadReminder = buildFinalWaveApprovalReminder(workScopedBoulderState.plan_name, progress, preferredSessionId)
followupReminder = null
} else if (isAlreadyVerified) {
leadReminder = buildAdvanceDirective(workScopedBoulderState.plan_name)
followupReminder = null
} else {
if (currentTask && sessionState && !isFinalWaveTask) {
if (!sessionState.verifiedTaskKeys) {
sessionState.verifiedTaskKeys = new Set<string>()
}
sessionState.verifiedTaskKeys.add(currentTask.key)
}
leadReminder = buildCompletionGate(workScopedBoulderState.plan_name, preferredSessionId)
followupReminder = buildOrchestratorReminder(
workScopedBoulderState.plan_name,
progress,
preferredSessionId,
autoCommit,
false,
)
}

toolOutput.output = `
<system-reminder>
Expand All @@ -225,6 +284,8 @@ ${
progress: `${progress.completed}/${progress.total}`,
fileCount: gitStats.length,
preferredSessionId,
waitingForFinalWaveApproval: shouldPauseForApproval,
waitingForFinalWaveApproval: shouldPauseForApproval || isMissingFinalWaveVerdict,
isMissingFinalWaveVerdict,
isAlreadyVerified,
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export function createToolExecuteAfterHandler(input: {
toolInput,
toolOutput,
metadataSessionId,
isPluginToolRetrieval: isPluginToolWithSession,
})
}
}
2 changes: 2 additions & 0 deletions packages/omo-opencode/src/hooks/atlas/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,6 @@ export interface SessionState {
stalledContinuationPlanPath?: string
/** The plan path the in-progress no-tool-progress counter is keyed to. Changes here reset the counter. */
activeContinuationPlanPath?: string
/** Top-level task keys whose completion gate already fired, used to suppress repeat 4-phase reminders. */
verifiedTaskKeys?: Set<string>
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { describe, expect, it } from "bun:test"
import { buildOrchestratorReminder, buildCompletionGate } from "./verification-reminders"
import {
buildOrchestratorReminder,
buildCompletionGate,
buildMissingVerdictEscalation,
buildAdvanceDirective,
} from "./verification-reminders"

// Test helpers for given/when/then pattern
const given = describe
Expand Down Expand Up @@ -92,3 +97,72 @@ describe("buildOrchestratorReminder", () => {
})
})
})

describe("buildMissingVerdictEscalation", () => {
given("a plan name, task label, and session id", () => {
const planName = "atlas-loop-compaction-bg-fixes"
const taskLabel = "T13: add builders"
const sessionId = "ses_review_abc"

when("buildMissingVerdictEscalation is called", () => {
const message = buildMissingVerdictEscalation(planName, taskLabel, sessionId)

then("output names the task label", () => {
expect(message).toContain(taskLabel)
})

then("output names the plan", () => {
expect(message).toContain(planName)
})

then("output says the boulder is paused", () => {
expect(message.toLowerCase()).toContain("paused")
})

then("output includes a reuse hint for the session", () => {
expect(message).toContain(sessionId)
})

then("output asks to confirm or re-run the review", () => {
expect(message.toLowerCase()).toContain("re-run the review")
expect(message).toContain("VERDICT: APPROVE")
expect(message).toContain("VERDICT: REJECT")
})
})
})
})

describe("buildAdvanceDirective", () => {
given("a plan name", () => {
const planName = "atlas-loop-compaction-bg-fixes"

when("buildAdvanceDirective is called", () => {
const directive = buildAdvanceDirective(planName)

then("output names the next unchecked task", () => {
expect(directive.toLowerCase()).toContain("next unchecked")
})

then("output names the plan file path", () => {
expect(directive).toContain(`.omo/plans/${planName}.md`)
})

then("output says do NOT re-verify finished work", () => {
expect(directive.toLowerCase()).toContain("do not re-verify")
})

then("output is short", () => {
expect(directive.length).toBeLessThan(600)
})

then("output does NOT contain 4-phase PROBABLY LYING content", () => {
expect(directive).not.toContain("PROBABLY LYING")
expect(directive).not.toContain("PHASE 1")
expect(directive).not.toContain("PHASE 2")
expect(directive).not.toContain("PHASE 3")
expect(directive).not.toContain("PHASE 4")
expect(directive).not.toContain("VERIFICATION_REMINDER")
})
})
})
})
24 changes: 24 additions & 0 deletions packages/omo-opencode/src/hooks/atlas/verification-reminders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,27 @@ If QA tasks exist in your todo list:

**NO TODO = NO TRACKING = INCOMPLETE WORK. Use todowrite aggressively.**`
}

export function buildMissingVerdictEscalation(planName: string, taskLabel: string, sessionId: string): string {
return `
**FINAL REVIEW INCOMPLETE - BOULDER PAUSED**

A reviewer for task \`${taskLabel}\` in plan \`${planName}\` returned no clear VERDICT: APPROVE or VERDICT: REJECT.

The boulder has paused. Please either:
1. Confirm the work is acceptable and manually mark the task complete
2. Re-run the review: \`task(task_id="${sessionId}", prompt="Re-run the final review and emit VERDICT: APPROVE or VERDICT: REJECT")\`

Do NOT auto-continue until you have a clear verdict.`
}

export function buildAdvanceDirective(planName: string): string {
return `
**TASK ALREADY COMPLETE - ADVANCE TO NEXT**

This task is already verified and marked complete in \`.omo/plans/${planName}.md\`.
Do NOT re-verify finished work.

Read the plan file now and proceed to the next unchecked \`- [ ]\` task.
If no unchecked tasks remain, the plan is complete - run the Final Verification Wave.`
}