Skip to content

Commit 1e1277a

Browse files
committed
fix: block tasks on failing verify gate
1 parent fa7fc8f commit 1e1277a

5 files changed

Lines changed: 143 additions & 2 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Blocking verify gate retried indefinitely
2+
3+
**Date:** 2026-06-30 11:27
4+
**Files:** packages/agent/src/stageErrorHandler.ts, packages/agent/src/subagents/verifier.ts, packages/agent/src/__tests__/stageErrorHandler.test.ts, packages/agent/src/__tests__/coordinator.test.ts
5+
**Severity:** medium
6+
7+
## Problem
8+
9+
When `aif-verify` emitted a blocking `aif-gate-result`, `runVerifier` threw a plain `Error`.
10+
The coordinator classified plain stage errors as `revert`, which left the task in `verify`.
11+
The next poll selected the same task again, creating an implicit retry loop.
12+
13+
## Root Cause
14+
15+
The verifier needed to distinguish a deliberate gate block from an unexpected runtime failure.
16+
Without a structured error type, the coordinator had no safe signal to move the task to
17+
`blocked_external` and avoid retrying every poll cycle.
18+
19+
## Solution
20+
21+
Added `StageManualBlockError` with a structured `blockedReason` field and taught
22+
`classifyStageError` to map it to `blocked_external` with `retryAfter=null`. The verifier now
23+
throws this error for blocking gate results after persisting the verification output. Regression
24+
tests cover both the classifier and the coordinator behavior across a second poll.
25+
26+
## Prevention
27+
28+
- Use structured error classes or fields for intentional stage-control outcomes.
29+
- Do not rely on generic `Error` when a coordinator needs different recovery behavior.
30+
- Add coordinator-level regression tests for any stage error that must not be retried.
31+
32+
## Tags
33+
34+
`#coordinator` `#verify` `#structured-errors` `#retry-loop` `#typescript`

packages/agent/src/__tests__/coordinator.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ const { runImplementer } = await import("../subagents/implementer.js");
9494
const { runReviewer } = await import("../subagents/reviewer.js");
9595
const { runVerifier } = await import("../subagents/verifier.js");
9696
const { handleAutoReviewGate } = await import("../autoReviewHandler.js");
97+
const { StageManualBlockError } = await import("../stageErrorHandler.js");
9798

9899
describe("coordinator", () => {
99100
beforeEach(() => {
@@ -310,6 +311,43 @@ describe("coordinator", () => {
310311
expect(task!.status).toBe("done");
311312
});
312313

314+
it("should block verify tasks instead of retrying when verify gate blocks", async () => {
315+
const db = testDb.current;
316+
db.insert(tasks)
317+
.values({
318+
id: "task-blocking-verify",
319+
projectId: "test-project",
320+
title: "Blocking verify",
321+
status: "verify",
322+
useSubagents: false,
323+
runPostVerify: true,
324+
})
325+
.run();
326+
327+
vi.mocked(runVerifier).mockRejectedValueOnce(
328+
new StageManualBlockError(
329+
"Verify stage returned a blocking gate result. Review the Verification section for details.",
330+
),
331+
);
332+
333+
await pollAndProcess();
334+
335+
expect(runVerifier).toHaveBeenCalledTimes(1);
336+
expect(runReviewer).not.toHaveBeenCalled();
337+
338+
const task = db.select().from(tasks).where(eq(tasks.id, "task-blocking-verify")).get();
339+
expect(task!.status).toBe("blocked_external");
340+
expect(task!.blockedFromStatus).toBe("verify");
341+
expect(task!.blockedReason).toBe(
342+
"Verify stage returned a blocking gate result. Review the Verification section for details.",
343+
);
344+
expect(task!.retryAfter).toBeNull();
345+
346+
await pollAndProcess();
347+
348+
expect(runVerifier).toHaveBeenCalledTimes(1);
349+
});
350+
313351
it("should not auto-implement plan_ready tasks when autoMode=false", async () => {
314352
const db = testDb.current;
315353
db.insert(tasks)

packages/agent/src/__tests__/stageErrorHandler.test.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ vi.mock("@aif/shared", async (importOriginal) => {
3434
};
3535
});
3636

37-
const { classifyStageError } = await import("../stageErrorHandler.js");
37+
const { classifyStageError, StageManualBlockError } = await import("../stageErrorHandler.js");
3838
import { appendTaskActivityLog } from "@aif/data";
3939

4040
function makeInput(overrides: Record<string, unknown> = {}) {
@@ -92,6 +92,26 @@ describe("classifyStageError", () => {
9292
expect(result.kind).toBe("blocked_external");
9393
});
9494

95+
it("returns manual blocked_external for explicit stage gate blocks", () => {
96+
const result = classifyStageError(
97+
makeInput({
98+
stageLabel: "verifier",
99+
sourceStatus: "verify",
100+
retryCount: 2,
101+
err: new StageManualBlockError("Verify gate blocked this task."),
102+
}),
103+
);
104+
105+
expect(result).toEqual({
106+
kind: "blocked_external",
107+
blockedReason: "Verify gate blocked this task.",
108+
retryAfter: null,
109+
retryAfterSource: "none",
110+
retryCount: 2,
111+
limitSnapshot: null,
112+
});
113+
});
114+
95115
it("includes retryAfter ISO string for external failures", () => {
96116
const before = Date.now();
97117
const result = classifyStageError(

packages/agent/src/stageErrorHandler.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ const NON_RETRYABLE_RUNTIME_CATEGORIES = new Set([
3232
"content_filter",
3333
]);
3434

35+
export class StageManualBlockError extends Error {
36+
readonly blockedReason: string;
37+
38+
constructor(blockedReason: string, message = blockedReason) {
39+
super(message);
40+
this.name = "StageManualBlockError";
41+
this.blockedReason = blockedReason;
42+
}
43+
}
44+
3545
export type ErrorRecovery =
3646
| { kind: "fast_retry" }
3747
| {
@@ -52,6 +62,14 @@ interface StageErrorInput {
5262
err: unknown;
5363
}
5464

65+
function findStageManualBlockError(err: unknown): StageManualBlockError | null {
66+
if (err instanceof StageManualBlockError) return err;
67+
if (err instanceof Error && "cause" in err && err.cause) {
68+
return findStageManualBlockError(err.cause);
69+
}
70+
return null;
71+
}
72+
5573
function buildUserSafeExternalReason(err: unknown): string {
5674
const runtimeError = findRuntimeExecutionError(err);
5775
if (!runtimeError) {
@@ -130,6 +148,33 @@ function resolveRetryAfter(err: unknown): {
130148
export function classifyStageError(input: StageErrorInput): ErrorRecovery {
131149
const { taskId, stageLabel, sourceStatus, err } = input;
132150

151+
const manualBlockErr = findStageManualBlockError(err);
152+
if (manualBlockErr) {
153+
logActivity(
154+
taskId,
155+
"Agent",
156+
`coordinator moved to blocked_external from ${sourceStatus} at ${stageLabel}; retryAfter=manual; source=none; reason=${truncateReason(manualBlockErr.blockedReason)}`,
157+
);
158+
159+
log.warn(
160+
{
161+
taskId,
162+
stage: stageLabel,
163+
errorName: manualBlockErr.name,
164+
},
165+
"Subagent stage requested manual block",
166+
);
167+
168+
return {
169+
kind: "blocked_external",
170+
blockedReason: manualBlockErr.blockedReason,
171+
retryAfter: null,
172+
retryAfterSource: "none",
173+
retryCount: input.retryCount ?? 0,
174+
limitSnapshot: null,
175+
};
176+
}
177+
133178
// Branch / worktree isolation failures must NEVER fall into the generic
134179
// revert path — generic revert triggers unbounded re-planning on a
135180
// corrupted work tree. Pin the task to blocked_external with no retry so

packages/agent/src/subagents/verifier.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { createRuntimeWorkflowSpec } from "@aif/runtime";
33
import { logger } from "@aif/shared";
44
import { assertCurrentBranch, restorePersistedBranch } from "../gitBranch.js";
55
import { logActivity } from "../hooks.js";
6+
import { StageManualBlockError } from "../stageErrorHandler.js";
67
import { executeSubagentQuery } from "../subagentQuery.js";
78

89
const log = logger("verifier");
@@ -110,7 +111,10 @@ Task description: ${task.description}`;
110111
const gate = extractVerifyGateResult(resultText);
111112
if (gate?.status === "fail" || gate?.blocking === true) {
112113
log.warn({ taskId, blockers: gate.blockers ?? [] }, "Verify stage returned blocking result");
113-
throw new Error("Verify stage returned a blocking gate result");
114+
throw new StageManualBlockError(
115+
"Verify stage returned a blocking gate result. Review the Verification section for details.",
116+
"Verify stage returned a blocking gate result",
117+
);
114118
}
115119

116120
logActivity(taskId, "Agent", "verify stage complete (aif-verify)");

0 commit comments

Comments
 (0)