lee-to · lee-to · May 14, 2026 · May 13, 2026 · May 14, 2026
diff --git a/.env.example b/.env.example
@@ -98,6 +98,10 @@ API_RUNTIME_RUN_TIMEOUT_MS=120000
 # Leave disabled until warmup callers are enabled and monitored.
 # AIF_RUNTIME_SESSION_FORK_ENABLED=false
 
+# Opt-in rollout for stage-scoped runtime selection pins.
+# When disabled, same-status retries resolve runtime profiles normally.
+# AIF_STAGE_RUNTIME_PIN_ENABLED=false
+
 # Opt-in OpenCode API transport workaround for long model generations.
 # When enabled, /session/:id/message uses an undici dispatcher with disabled
 # header/body idle timeouts while AGENT_STAGE_RUN_TIMEOUT_MS remains authoritative.

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -300,7 +300,7 @@ SQLite via `better-sqlite3` with `drizzle-orm` for type-safe queries. Schema is
 
 Key tables:
 
-- **tasks** — task data, status, plan/logs, heartbeat metadata, runtime override fields (`runtime_profile_id`, `model_override`, `runtime_options_json`), runtime session id (`session_id`), auto-review convergence state (`manual_review_required`, `auto_review_state_json`), and task-level runtime-limit copy (`runtime_limit_snapshot_json`, `runtime_limit_updated_at`)
+- **tasks** — task data, status, plan/logs, heartbeat metadata, runtime override fields (`runtime_profile_id`, `model_override`, `runtime_options_json`), runtime session id (`session_id`), internal stage-scoped runtime retry pin (`active_runtime_status`, `active_runtime_selection_json`), auto-review convergence state (`manual_review_required`, `auto_review_state_json`), and task-level runtime-limit copy (`runtime_limit_snapshot_json`, `runtime_limit_updated_at`). The active runtime fields are distinct from `session_id` and `runtime_limit_snapshot_json`; they store the runtime/profile/model/options selected for same-status retries and are cleared on stage or human transitions except `retry_from_blocked`.
 - **runtime_profiles** — project-scoped or global runtime/provider profiles with non-secret transport/model config plus authoritative runtime-limit state (`runtime_limit_snapshot_json`, `runtime_limit_updated_at`)
 - **projects** — project metadata plus default runtime profile ids for tasks and chat
 - **chat_sessions / chat_messages** — persisted chat state with runtime profile/session linkage

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -52,6 +52,7 @@ Node packages (`@aif/api`, `@aif/agent`, `@aif/data`, `@aif/shared`) auto-load e
 | `COORDINATOR_MAX_CONCURRENT_TASKS`                     | number  | `3`                            | Max concurrent tasks per stage for parallel-enabled projects. Non-parallel projects always process 1 task at a time regardless of this value. Range 1–10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | `AIF_TASK_WORKTREES_ENABLED`                           | boolean | `false`                        | Off-by-default rollout flag for per-task git worktrees. When `false`, branch-isolated projects (`git.create_branches=true`) stay serial and the API rejects parallel auto-queue for those projects. When `true`, full-mode planning for parallel branch-isolated projects provisions an isolated sibling git worktree and stores it on `tasks.worktree_path`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | `AIF_RUNTIME_SESSION_FORK_ENABLED`                     | boolean | `false`                        | Off-by-default rollout flag for runtime session forks. When `false`, adapters keep `supportsSessionFork=false` in descriptor/effective capabilities even if the transport implementation exists. When `true`, fork-capable transports expose `supportsSessionFork=true` so warmup callers can opt into `forkSession()`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| `AIF_STAGE_RUNTIME_PIN_ENABLED`                        | boolean | `false`                        | Off-by-default rollout flag for stage-scoped task runtime-selection pins. When `false`, every subagent execution resolves the effective runtime profile normally and skips task pin lookup/persistence. When `true`, the agent stores the runtime/profile/model/options chosen at the start of a task status and reuses them for same-status retries until the task advances or a human transition clears the pin                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | `AIF_RUNTIME_CODEX_NATIVE_SUBAGENTS_ENABLED`           | boolean | `false`                        | Off-by-default rollout flag for Codex SDK native subagents. When `false`, Codex `native_subagents` workflows fall back to isolated `$aif-*` skill-session execution even if the profile requests `codexSubagentStrategy: "native"`. When `true`, Codex may use native subagents only for SDK profiles with required `.codex/agents/*.toml` and `.codex/config.toml` assets present; missing assets still fall back to isolated mode                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | `AIF_RUNTIME_OPENCODE_LONG_RUNNING_DISPATCHER_ENABLED` | boolean | `false`                        | Off-by-default rollout flag for long-running OpenCode API session messages. When `false`, OpenCode `/session/:id/message` uses the default fetch behavior. When `true`, that message POST uses an undici dispatcher with disabled header/body idle timeouts while the adapter AbortController timeout remains authoritative                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | `AGENT_BYPASS_PERMISSIONS`                             | boolean | `true`                         | Provider-neutral bypass flag. When `true`, subagents run without approval prompts and without any OS-level sandbox. Each adapter translates per its native mechanism — Claude: `--dangerously-skip-permissions`; Codex: `approval_policy=never` + `sandbox_mode=danger-full-access`. When `false`, each adapter falls back to its safer default (Claude: `.claude/settings.json` allow rules; Codex: `approval_policy=on-request` + `sandbox_mode=workspace-write`). See `docs/providers.md` § Bypass semantics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |

diff --git a/packages/agent/src/__tests__/coordinator.test.ts b/packages/agent/src/__tests__/coordinator.test.ts
@@ -256,8 +256,15 @@ describe("coordinator", () => {
         projectId: "test-project",
         title: "Resume impl",
         status: "implementing",
+        activeRuntimeStatus: "implementing",
+        activeRuntimeSelectionJson: JSON.stringify({ status: "implementing" }),
       })
       .run();
+    vi.mocked(runImplementer).mockImplementationOnce(async () => {
+      const task = db.select().from(tasks).where(eq(tasks.id, "task-impl")).get();
+      expect(task!.activeRuntimeStatus).toBe("implementing");
+      expect(task!.activeRuntimeSelectionJson).toBe(JSON.stringify({ status: "implementing" }));
+    });
 
     await pollAndProcess();
 
@@ -266,6 +273,8 @@ describe("coordinator", () => {
     expect(runReviewer).toHaveBeenCalledWith("task-impl", "/tmp/test");
     const task = db.select().from(tasks).where(eq(tasks.id, "task-impl")).get();
     expect(task!.status).toBe("done");
+    expect(task!.activeRuntimeStatus).toBeNull();
+    expect(task!.activeRuntimeSelectionJson).toBeNull();
   });
 
   it("should pick up review tasks and dispatch reviewer", async () => {

diff --git a/packages/agent/src/__tests__/hooks.test.ts b/packages/agent/src/__tests__/hooks.test.ts
@@ -68,6 +68,7 @@ function makeEnv(overrides: Record<string, unknown> = {}) {
     AGENT_USE_SUBAGENTS: true,
     AGENT_FIRST_ACTIVITY_TIMEOUT_MS: 60_000,
     AIF_USAGE_LIMITS_ENABLED: false,
+    AIF_STAGE_RUNTIME_PIN_ENABLED: false,
     AIF_WARMUP_ENABLED: false,
     AIF_RUNTIME_CODEX_NATIVE_SUBAGENTS_ENABLED: false,
     AIF_TASK_WORKTREES_ENABLED: false,

diff --git a/packages/agent/src/__tests__/subagentQuery.test.ts b/packages/agent/src/__tests__/subagentQuery.test.ts
@@ -10,6 +10,8 @@ const clearRuntimeProfileLimitSnapshotMock = vi.fn();
 const notifyProjectRuntimeLimitBroadcastMock = vi.fn();
 const saveTaskSessionIdMock = vi.fn();
 const getTaskSessionIdMock = vi.fn<(taskId: string) => string | null>(() => null);
+const saveTaskActiveRuntimeSelectionMock = vi.fn();
+const getTaskActiveRuntimeSelectionMock = vi.fn<() => Record<string, unknown> | null>(() => null);
 const codexStartThreadMock = vi.fn();
 const codexResumeThreadMock = vi.fn();
 const expireStaleRuntimeWarmupSessionsMock = vi.fn(() => 0);
@@ -41,6 +43,7 @@ const getAppDefaultRuntimeProfileIdMock = vi.fn<
 interface MockTaskRow {
   id: string;
   projectId: string;
+  status?: string;
   runtimeOptionsJson: string | null;
   modelOverride: string | null;
   branchName?: string | null;
@@ -103,6 +106,8 @@ vi.mock("@aif/data", async (importOriginal) => {
     updateTaskHeartbeat: vi.fn(),
     renewTaskClaim: vi.fn(),
     persistRuntimeProfileLimitSnapshot: persistRuntimeProfileLimitSnapshotMock,
+    saveTaskActiveRuntimeSelection: saveTaskActiveRuntimeSelectionMock,
+    getTaskActiveRuntimeSelection: getTaskActiveRuntimeSelectionMock,
     saveTaskSessionId: saveTaskSessionIdMock,
     getTaskSessionId: getTaskSessionIdMock,
     expireStaleRuntimeWarmupSessions: expireStaleRuntimeWarmupSessionsMock,
@@ -147,6 +152,7 @@ const baseMockEnv = {
   AGENT_USE_SUBAGENTS: true,
   AGENT_FIRST_ACTIVITY_TIMEOUT_MS: 60_000,
   AIF_USAGE_LIMITS_ENABLED: true,
+  AIF_STAGE_RUNTIME_PIN_ENABLED: false,
   AIF_WARMUP_ENABLED: false,
   AIF_RUNTIME_CODEX_NATIVE_SUBAGENTS_ENABLED: false,
   TELEGRAM_BOT_TOKEN: undefined,
@@ -194,6 +200,12 @@ const { RuntimeExecutionError, createRuntimeWorkflowSpec } = await import("@aif/
 const { executeSubagentQuery, resolveAdapterForTask } = await import("../subagentQuery.js");
 
 beforeEach(() => {
+  for (const key of Object.keys(mockEnvOverrides)) {
+    delete mockEnvOverrides[key];
+  }
+  saveTaskActiveRuntimeSelectionMock.mockReset();
+  getTaskActiveRuntimeSelectionMock.mockReset();
+  getTaskActiveRuntimeSelectionMock.mockReturnValue(null);
   expireStaleRuntimeWarmupSessionsMock.mockReset();
   expireStaleRuntimeWarmupSessionsMock.mockReturnValue(0);
   findActiveReadyRuntimeWarmupSessionMock.mockReset();
@@ -1439,10 +1451,52 @@ describe("executeSubagentQuery model fallback policy", () => {
     expect(callOptions.model).toBe("task-model");
   });
 
-  it("uses profile defaultModel when no task override", async () => {
+  it("skips active runtime pin lookup and persistence when the rollout flag is disabled", async () => {
+    findTaskByIdMock.mockReturnValue({
+      id: "task-1",
+      projectId: "project-1",
+      status: "implementing",
+      runtimeOptionsJson: null,
+      modelOverride: null,
+    });
+    getTaskActiveRuntimeSelectionMock.mockReturnValue({
+      status: "implementing",
+      profileMode: "task",
+      source: "project_default",
+      profileId: "profile-old",
+      runtimeId: "claude",
+      providerId: "anthropic",
+      transport: "sdk",
+      model: "pinned-model",
+      baseUrl: null,
+      apiKeyEnvVar: "ANTHROPIC_API_KEY",
+      headers: {},
+      options: { effort: "medium" },
+      pinnedAt: "2026-05-13T00:00:00.000Z",
+    });
+    queryMock.mockImplementation(makeDelayedSuccess(0, "ok"));
+
+    await executeSubagentQuery({
+      taskId: "task-1",
+      projectRoot: "/tmp/project",
+      agentName: "review-gate",
+      prompt: "check",
+      workflowKind: "review-gate",
+    });
+
+    const callOptions = queryMock.mock.calls[0][0].options as Record<string, unknown>;
+    expect(callOptions.model).toBe("profile-model");
+    expect(resolveEffectiveRuntimeProfileMock).toHaveBeenCalled();
+    expect(getTaskActiveRuntimeSelectionMock).not.toHaveBeenCalled();
+    expect(saveTaskActiveRuntimeSelectionMock).not.toHaveBeenCalled();
+  });
+
+  it("persists active runtime selection when the rollout flag is enabled", async () => {
+    mockEnvOverrides.AIF_STAGE_RUNTIME_PIN_ENABLED = true;
     findTaskByIdMock.mockReturnValue({
       id: "task-1",
       projectId: "project-1",
+      status: "implementing",
       runtimeOptionsJson: null,
       modelOverride: null,
     });
@@ -1458,6 +1512,72 @@ describe("executeSubagentQuery model fallback policy", () => {
 
     const callOptions = queryMock.mock.calls[0][0].options as Record<string, unknown>;
     expect(callOptions.model).toBe("profile-model");
+    expect(saveTaskActiveRuntimeSelectionMock).toHaveBeenCalledWith(
+      "task-1",
+      expect.objectContaining({
+        status: "implementing",
+        profileMode: "task",
+        runtimeId: "claude",
+        providerId: "anthropic",
+        profileId: "profile-1",
+        model: "profile-model",
+      }),
+    );
+    delete mockEnvOverrides.AIF_STAGE_RUNTIME_PIN_ENABLED;
+  });
+
+  it("uses pinned runtime selection for retries in the same status and profile mode", async () => {
+    mockEnvOverrides.AIF_STAGE_RUNTIME_PIN_ENABLED = true;
+    findTaskByIdMock.mockReturnValue({
+      id: "task-1",
+      projectId: "project-1",
+      status: "implementing",
+      runtimeOptionsJson: JSON.stringify({ effort: "new-effort" }),
+      modelOverride: "new-task-model",
+    });
+    getTaskActiveRuntimeSelectionMock.mockReturnValue({
+      status: "implementing",
+      profileMode: "task",
+      source: "project_default",
+      profileId: "profile-old",
+      runtimeId: "claude",
+      providerId: "anthropic",
+      transport: "sdk",
+      model: "pinned-model",
+      baseUrl: null,
+      apiKeyEnvVar: "ANTHROPIC_API_KEY",
+      headers: {},
+      options: { effort: "medium" },
+      pinnedAt: "2026-05-13T00:00:00.000Z",
+    });
+    resolveEffectiveRuntimeProfileMock.mockReturnValue({
+      source: "project_default",
+      profile: {
+        id: "profile-new",
+        runtimeId: "claude",
+        providerId: "anthropic",
+        defaultModel: "new-profile-model",
+      },
+      taskRuntimeProfileId: null,
+      projectRuntimeProfileId: "profile-new",
+      systemRuntimeProfileId: null,
+    });
+    queryMock.mockImplementation(makeDelayedSuccess(0, "ok"));
+
+    await executeSubagentQuery({
+      taskId: "task-1",
+      projectRoot: "/tmp/project",
+      agentName: "review-gate",
+      prompt: "check",
+      workflowKind: "review-gate",
+    });
+
+    const callOptions = queryMock.mock.calls[0][0].options as Record<string, unknown>;
+    expect(callOptions.model).toBe("pinned-model");
+    expect(callOptions.effort).toBe("medium");
+    expect(resolveEffectiveRuntimeProfileMock).not.toHaveBeenCalled();
+    expect(saveTaskActiveRuntimeSelectionMock).not.toHaveBeenCalled();
+    delete mockEnvOverrides.AIF_STAGE_RUNTIME_PIN_ENABLED;
   });
 
   it("does not inject lightModel when no task override and no profile model", async () => {