Skip to content

Commit 7017e09

Browse files
committed
docs: specify skills-mode flag matrix
1 parent 1e1277a commit 7017e09

2 files changed

Lines changed: 191 additions & 1 deletion

File tree

docs/architecture.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,24 @@ Auto-review strategy is controlled globally by `AGENT_AUTO_REVIEW_STRATEGY`:
185185

186186
Tasks also have a `skipReview` flag (default `false`). When `true`, the coordinator bypasses the review stage entirely — after successful implementation the task moves directly to `done`, skipping the `review-sidecar` and `security-sidecar` runs. This is useful for small changes or tasks where code review is unnecessary.
187187

188-
Skills-mode tasks (`useSubagents=false`) also have two opt-in flags. `runPlanImprove` inserts `/aif-improve` after the initial plan and before `plan_ready`. `runPostVerify` inserts `/aif-verify` after implementation and before review; if `skipReview=true`, verification moves directly to `done`. Both flags default to `false` and are ignored for subagent tasks.
188+
Skills-mode tasks (`useSubagents=false`) also have two opt-in flags. `runPlanImprove` inserts `/aif-improve` after the initial plan and before `plan_ready`. This is plan refinement: it may replace the stored plan only when the improver returns a complete plan-shaped update. `runPostVerify` inserts `/aif-verify` after implementation and before review. This is an execution validation gate: it stores verification output, passes through to review/done on pass or warn, and moves to `blocked_external` for a blocking gate result. Both flags default to `false` and are ignored for subagent tasks.
189+
190+
Flag interaction table:
191+
192+
| `useSubagents` | `skipReview` | `runPlanImprove` | `runPostVerify` | Effective pipeline after planning starts |
193+
| -------------- | ------------ | ---------------- | --------------- | ----------------------------------------------------------------------- |
194+
| `true` | `false` | ignored | ignored | Planning → Plan Ready → Implementing → Review → Done |
195+
| `true` | `true` | ignored | ignored | Planning → Plan Ready → Implementing → Done |
196+
| `false` | `false` | `false` | `false` | Planning → Plan Ready → Implementing → Review → Done |
197+
| `false` | `true` | `false` | `false` | Planning → Plan Ready → Implementing → Done |
198+
| `false` | `false` | `true` | `false` | Planning → Improve → Plan Ready → Implementing → Review → Done |
199+
| `false` | `true` | `true` | `false` | Planning → Improve → Plan Ready → Implementing → Done |
200+
| `false` | `false` | `false` | `true` | Planning → Plan Ready → Implementing → Verify → Review → Done |
201+
| `false` | `true` | `false` | `true` | Planning → Plan Ready → Implementing → Verify → Done |
202+
| `false` | `false` | `true` | `true` | Planning → Improve → Plan Ready → Implementing → Verify → Review → Done |
203+
| `false` | `true` | `true` | `true` | Planning → Improve → Plan Ready → Implementing → Verify → Done |
204+
205+
`verify` remains a coordinator stage, not a human action. That keeps it covered by the same claim, timeout, watchdog, runtime-profile, and activity-log machinery as other autonomous work. The semantic contract is narrower than review: verify validates the implementation against the accepted plan, while review/security sidecars evaluate code quality and risk.
189206

190207
### QA Pipeline
191208

packages/agent/src/__tests__/coordinator.test.ts

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,179 @@ describe("coordinator", () => {
348348
expect(runVerifier).toHaveBeenCalledTimes(1);
349349
});
350350

351+
it.each([
352+
{
353+
name: "subagent mode ignores improve/verify and runs review",
354+
useSubagents: true,
355+
skipReview: false,
356+
runPlanImprove: true,
357+
runPostVerify: true,
358+
expectImprover: false,
359+
expectVerifier: false,
360+
expectReviewer: true,
361+
},
362+
{
363+
name: "subagent mode with skipReview ignores verify and goes done",
364+
useSubagents: true,
365+
skipReview: true,
366+
runPlanImprove: true,
367+
runPostVerify: true,
368+
expectImprover: false,
369+
expectVerifier: false,
370+
expectReviewer: false,
371+
},
372+
{
373+
name: "skills mode baseline runs review",
374+
useSubagents: false,
375+
skipReview: false,
376+
runPlanImprove: false,
377+
runPostVerify: false,
378+
expectImprover: false,
379+
expectVerifier: false,
380+
expectReviewer: true,
381+
},
382+
{
383+
name: "skills mode skipReview goes done",
384+
useSubagents: false,
385+
skipReview: true,
386+
runPlanImprove: false,
387+
runPostVerify: false,
388+
expectImprover: false,
389+
expectVerifier: false,
390+
expectReviewer: false,
391+
},
392+
{
393+
name: "skills mode improve then review",
394+
useSubagents: false,
395+
skipReview: false,
396+
runPlanImprove: true,
397+
runPostVerify: false,
398+
expectImprover: true,
399+
expectVerifier: false,
400+
expectReviewer: true,
401+
},
402+
{
403+
name: "skills mode improve with skipReview goes done",
404+
useSubagents: false,
405+
skipReview: true,
406+
runPlanImprove: true,
407+
runPostVerify: false,
408+
expectImprover: true,
409+
expectVerifier: false,
410+
expectReviewer: false,
411+
},
412+
{
413+
name: "skills mode verify then review",
414+
useSubagents: false,
415+
skipReview: false,
416+
runPlanImprove: false,
417+
runPostVerify: true,
418+
expectImprover: false,
419+
expectVerifier: true,
420+
expectReviewer: true,
421+
},
422+
{
423+
name: "skills mode verify with skipReview goes done",
424+
useSubagents: false,
425+
skipReview: true,
426+
runPlanImprove: false,
427+
runPostVerify: true,
428+
expectImprover: false,
429+
expectVerifier: true,
430+
expectReviewer: false,
431+
},
432+
{
433+
name: "skills mode improve and verify then review",
434+
useSubagents: false,
435+
skipReview: false,
436+
runPlanImprove: true,
437+
runPostVerify: true,
438+
expectImprover: true,
439+
expectVerifier: true,
440+
expectReviewer: true,
441+
},
442+
{
443+
name: "skills mode improve and verify with skipReview goes done",
444+
useSubagents: false,
445+
skipReview: true,
446+
runPlanImprove: true,
447+
runPostVerify: true,
448+
expectImprover: true,
449+
expectVerifier: true,
450+
expectReviewer: false,
451+
},
452+
])(
453+
"should follow the skills-mode flag truth table: $name",
454+
async ({
455+
name,
456+
useSubagents,
457+
skipReview,
458+
runPlanImprove,
459+
runPostVerify,
460+
expectImprover,
461+
expectVerifier,
462+
expectReviewer,
463+
}) => {
464+
const db = testDb.current;
465+
const taskId = `task-flag-table-${name.replace(/[^a-zA-Z0-9]+/g, "-").toLowerCase()}`;
466+
db.insert(tasks)
467+
.values({
468+
id: taskId,
469+
projectId: "test-project",
470+
title: name,
471+
status: "planning",
472+
autoMode: true,
473+
useSubagents,
474+
skipReview,
475+
runPlanImprove,
476+
runPostVerify,
477+
})
478+
.run();
479+
480+
await pollAndProcess();
481+
482+
expect(runPlanner).toHaveBeenCalledWith(taskId, "/tmp/test");
483+
expect(runPlanChecker).toHaveBeenCalledWith(taskId, "/tmp/test");
484+
expect(runImplementer).toHaveBeenCalledWith(taskId, "/tmp/test");
485+
486+
if (expectImprover) {
487+
expect(runImprover).toHaveBeenCalledWith(taskId, "/tmp/test");
488+
expect(vi.mocked(runPlanner).mock.invocationCallOrder[0]).toBeLessThan(
489+
vi.mocked(runImprover).mock.invocationCallOrder[0] ?? 0,
490+
);
491+
expect(vi.mocked(runImprover).mock.invocationCallOrder[0]).toBeLessThan(
492+
vi.mocked(runPlanChecker).mock.invocationCallOrder[0] ?? 0,
493+
);
494+
} else {
495+
expect(runImprover).not.toHaveBeenCalled();
496+
}
497+
498+
if (expectVerifier) {
499+
expect(runVerifier).toHaveBeenCalledWith(taskId, "/tmp/test");
500+
expect(vi.mocked(runImplementer).mock.invocationCallOrder[0]).toBeLessThan(
501+
vi.mocked(runVerifier).mock.invocationCallOrder[0] ?? 0,
502+
);
503+
} else {
504+
expect(runVerifier).not.toHaveBeenCalled();
505+
}
506+
507+
if (expectReviewer) {
508+
expect(runReviewer).toHaveBeenCalledWith(taskId, "/tmp/test");
509+
const previousStageOrder = expectVerifier
510+
? (vi.mocked(runVerifier).mock.invocationCallOrder[0] ?? 0)
511+
: (vi.mocked(runImplementer).mock.invocationCallOrder[0] ?? 0);
512+
expect(previousStageOrder).toBeLessThan(
513+
vi.mocked(runReviewer).mock.invocationCallOrder[0] ?? 0,
514+
);
515+
} else {
516+
expect(runReviewer).not.toHaveBeenCalled();
517+
}
518+
519+
const task = db.select().from(tasks).where(eq(tasks.id, taskId)).get();
520+
expect(task!.status).toBe("done");
521+
},
522+
);
523+
351524
it("should not auto-implement plan_ready tasks when autoMode=false", async () => {
352525
const db = testDb.current;
353526
db.insert(tasks)

0 commit comments

Comments
 (0)