Skip to content

Commit 6460bce

Browse files
authored
feat: add deterministic escalation policy and explain evidence (#16)
* feat: add deterministic escalation policy and explain evidence * fix: address PR review comments on escalation policy constants * docs: link release-log entry to PR #16 --------- Co-authored-by: Hanna Rosengren <4538260+hannasoderstromdev@users.noreply.github.com>
1 parent c0d032b commit 6460bce

8 files changed

Lines changed: 231 additions & 4 deletions

File tree

docs/release-log.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
## Unreleased
22

3+
### 2026-05-10 — Add deterministic escalation policy and escalation evidence in explain output
4+
5+
- What changed: Added explicit escalation policy handling for low-confidence turns, user corrections, repeated failures, and high-risk implementation; propagated escalation fields into route evidence and human explain output.
6+
- Why it matters: Makes routing behavior less implicit and easier to audit by showing why a stronger target was selected.
7+
- Who is affected: Switchboard users and maintainers reviewing route decisions.
8+
- Action needed: None.
9+
- PR: https://github.com/hannasdev/model-switchboard/pull/16
10+
311
### 2026-05-10 — Harden CI security defaults and supply-chain hygiene
412

513
- What changed: Added a security policy and CODEOWNERS, introduced Dependabot updates, tightened GitHub Actions permissions, and pinned core GitHub Actions to immutable SHAs.

src/router/data/fixtures.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
"session": { "mode": "plan", "cost_posture": "balanced" },
104104
"expected": {
105105
"mode": "plan",
106-
"label": "balanced",
106+
"label": "best coder",
107107
"requiredCapabilities": ["chat", "reasoning", "structured_output"],
108108
"explanationIncludes": ["planning/tradeoff analysis"],
109109
"classificationReason": "user_correction_signal"

src/router/router.js

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ const LABEL_TO_CLASS_RANK = {
3434
"best coder": 4
3535
};
3636

37+
const LOW_CONFIDENCE_THRESHOLD = 0.7;
38+
3739
const PRIVACY_TIER_RANK = {
3840
external: 1,
3941
standard: 2,
@@ -276,6 +278,84 @@ function buildConstraintInputs(session = {}) {
276278
};
277279
}
278280

281+
function strongerClass(currentClass, candidateClass) {
282+
const currentRank = LABEL_TO_CLASS_RANK[CLASS_TO_LABEL[currentClass]] || 0;
283+
const candidateRank = LABEL_TO_CLASS_RANK[CLASS_TO_LABEL[candidateClass]] || 0;
284+
return candidateRank > currentRank ? candidateClass : currentClass;
285+
}
286+
287+
function preferredLabelOrderFor(desiredLabel) {
288+
let fallbackOrder;
289+
if (desiredLabel === "best coder") {
290+
fallbackOrder = ["best coder", "deep reasoning", "balanced", "quick"];
291+
} else if (desiredLabel === "deep reasoning") {
292+
fallbackOrder = ["deep reasoning", "best coder", "balanced", "quick"];
293+
} else if (desiredLabel === "quick") {
294+
fallbackOrder = ["quick", "balanced", "deep reasoning", "best coder"];
295+
} else {
296+
fallbackOrder = ["balanced", "deep reasoning", "best coder", "quick"];
297+
}
298+
299+
return [desiredLabel, ...fallbackOrder.filter((label) => label !== desiredLabel)];
300+
}
301+
302+
function resolveEscalationPolicy({ classification = {}, session = {}, mode }) {
303+
let desiredClass = MODE_TO_CLASS[mode] || "medium_reasoning";
304+
const reasons = [];
305+
306+
const lowConfidence = Number(classification.confidence || 0) < LOW_CONFIDENCE_THRESHOLD;
307+
const userCorrection = classification.reason === "user_correction_signal";
308+
const repeatedFailures =
309+
Number(session?.failureSignals?.recentToolFailures || 0) +
310+
Number(session?.failureSignals?.recentTestFailures || 0) >=
311+
2;
312+
const highRiskImplementation = mode === "implement" && session.riskLevel === "high";
313+
314+
if (userCorrection) {
315+
desiredClass = strongerClass(desiredClass, "strong_reasoning");
316+
reasons.push("user_correction");
317+
}
318+
319+
if (repeatedFailures && mode !== "summarize") {
320+
const repeatedFailureClass = ["implement", "debug"].includes(mode)
321+
? "strong_coding"
322+
: "strong_reasoning";
323+
desiredClass = strongerClass(desiredClass, repeatedFailureClass);
324+
reasons.push("repeated_failures");
325+
}
326+
327+
if (highRiskImplementation) {
328+
desiredClass = strongerClass(desiredClass, "strong_coding");
329+
reasons.push("high_risk_implementation");
330+
}
331+
332+
if (lowConfidence && ["implement", "debug", "review"].includes(mode)) {
333+
const lowConfidenceClass = mode === "review" ? "strong_reasoning" : "strong_coding";
334+
desiredClass = strongerClass(desiredClass, lowConfidenceClass);
335+
reasons.push("low_confidence");
336+
}
337+
338+
if (classification.escalate) {
339+
const escalatedClass = strongerClass(desiredClass, classification.escalate);
340+
if (escalatedClass !== desiredClass) {
341+
desiredClass = escalatedClass;
342+
reasons.push("classification_escalation");
343+
}
344+
}
345+
346+
return {
347+
applied: reasons.length > 0,
348+
reasons,
349+
desiredClass,
350+
signals: {
351+
lowConfidence,
352+
userCorrection,
353+
repeatedFailures,
354+
highRiskImplementation
355+
}
356+
};
357+
}
358+
279359
function describeCurrentTargetStatus({ session, targets = [], eligible = [], blocked = [] }) {
280360
const currentTargetId = session.currentTargetId || null;
281361
if (!currentTargetId) {
@@ -440,7 +520,8 @@ export function routePrompt({
440520
const modeResolution = resolveSessionMode(session, classification);
441521
const mode = modeResolution.resolvedMode;
442522
const requiredCapabilities = buildRequiredCapabilities(mode, classification.taskType);
443-
const desiredClass = classification.escalate || MODE_TO_CLASS[mode] || "medium_reasoning";
523+
const escalationPolicy = resolveEscalationPolicy({ classification, session, mode });
524+
const desiredClass = escalationPolicy.desiredClass;
444525
const projectOverrideLabel = resolveProjectOverrideLabel(session, mode);
445526
const desiredLabel = projectOverrideLabel || CLASS_TO_LABEL[desiredClass] || "balanced";
446527

@@ -461,7 +542,7 @@ export function routePrompt({
461542
}
462543

463544
const overrideSelection = applyRoutingOverride({ eligible, desiredLabel, session, targets, blocked });
464-
const preferredOrder = [desiredLabel, "balanced", "deep reasoning", "best coder", "quick"];
545+
const preferredOrder = preferredLabelOrderFor(desiredLabel);
465546
const preferredTarget = overrideSelection.target || selectByLabelPriority(eligible, preferredOrder);
466547
const continuitySelection = applyContinuitySwitchPolicy({
467548
selectedTarget: preferredTarget,
@@ -491,6 +572,7 @@ export function routePrompt({
491572
classification,
492573
modeResolution,
493574
policyInputs: constraintInputs,
575+
escalationPolicy,
494576
routingOverride: {
495577
requested: overrideSelection.override,
496578
applied: overrideSelection.overrideApplied,
@@ -514,6 +596,10 @@ export function routePrompt({
514596
if (requiredCapabilities.includes("file_edit")) whyParts.push("repo edits");
515597
if (requiredCapabilities.includes("test_execution")) whyParts.push("test execution");
516598

599+
if (escalationPolicy.applied) {
600+
whyParts.push(`escalation(${escalationPolicy.reasons.join(",")})`);
601+
}
602+
517603
return {
518604
status: "ok",
519605
action,
@@ -529,6 +615,7 @@ export function routePrompt({
529615
classification,
530616
modeResolution,
531617
policyInputs: constraintInputs,
618+
escalationPolicy,
532619
routingOverride: {
533620
requested: overrideSelection.override,
534621
applied: overrideSelection.overrideApplied,

src/switchboard/cli.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ function printHumanExplain(explanation, stdout) {
6868
stdout.write(`Claude session: ${explanation.selectedClaude?.sessionId || "unknown"}\n`);
6969
stdout.write(`Claude target: ${explanation.selectedClaude?.model || "unknown"}/${explanation.selectedClaude?.effort || "unknown"}\n`);
7070
stdout.write(`Route: ${explanation.routeDecision?.label || "unknown"} (${explanation.routeDecision?.mode || "unknown"})\n`);
71+
const escalation = explanation.routeDecision?.escalationPolicy;
72+
if (escalation?.applied && Array.isArray(escalation.reasons) && escalation.reasons.length > 0) {
73+
stdout.write(`Escalation: ${escalation.reasons.join(",")}\n`);
74+
}
7175
stdout.write(`Route context: ${explanation.routeContext.status}\n`);
7276
stdout.write(`Hook events: ${explanation.hookEvents.length}\n`);
7377
for (const event of explanation.hookEvents) {

src/switchboard/workflow.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ function routeDecisionSummary(plan) {
8585
routingOverride: route.routingOverride || null,
8686
modeResolution: route.modeResolution || null,
8787
policyInputs: route.policyInputs || null,
88+
escalationPolicy: route.escalationPolicy || null,
8889
explanation: route.explanation || null
8990
};
9091
}

test/router.test.js

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,105 @@ test("stay override reports hard constraint blockers for ineligible current targ
277277
assert.equal(result.routingOverride.applied, false);
278278
assert.equal(result.routingOverride.reason, "current_target_blocked_by_hard_constraints");
279279
});
280+
281+
test("low confidence escalates review routing", () => {
282+
const result = routePrompt({
283+
input: "Could you sanity check this?",
284+
session: {
285+
mode: "review"
286+
},
287+
targets: openaiTargets,
288+
executionSupported: false
289+
});
290+
291+
assert.equal(result.status, "ok");
292+
assert.equal(result.mode, "review");
293+
assert.equal(result.escalationPolicy.applied, true);
294+
assert.equal(result.escalationPolicy.reasons.includes("low_confidence"), true);
295+
assert.equal(result.selectedTarget?.label, "best coder");
296+
});
297+
298+
test("repeated failures trigger escalation", () => {
299+
const result = routePrompt({
300+
input: "Plan the rollout in phases with tradeoffs.",
301+
session: {
302+
mode: "plan",
303+
failureSignals: {
304+
recentToolFailures: 1,
305+
recentTestFailures: 2
306+
}
307+
},
308+
targets: openaiTargets,
309+
executionSupported: false
310+
});
311+
312+
assert.equal(result.status, "ok");
313+
assert.equal(result.escalationPolicy.applied, true);
314+
assert.equal(result.escalationPolicy.reasons.includes("repeated_failures"), true);
315+
assert.equal(result.selectedTarget?.label, "best coder");
316+
});
317+
318+
test("user correction trigger is explicit in escalation policy", () => {
319+
const result = routePrompt({
320+
input: "That is a wrong assumption. Compare alternatives again.",
321+
session: {
322+
mode: "plan"
323+
},
324+
targets: openaiTargets,
325+
executionSupported: false
326+
});
327+
328+
assert.equal(result.status, "ok");
329+
assert.equal(result.escalationPolicy.applied, true);
330+
assert.equal(result.escalationPolicy.reasons.includes("user_correction"), true);
331+
assert.equal(result.escalationPolicy.reasons.includes("classification_escalation"), false);
332+
assert.equal(result.selectedTarget?.label, "best coder");
333+
});
334+
335+
test("high-risk implementation is explicitly escalated", () => {
336+
const result = routePrompt({
337+
input: "Implement the plan.",
338+
session: {
339+
mode: "plan",
340+
riskLevel: "high"
341+
},
342+
targets: openaiTargets,
343+
executionSupported: false
344+
});
345+
346+
assert.equal(result.status, "ok");
347+
assert.equal(result.mode, "implement");
348+
assert.equal(result.escalationPolicy.applied, true);
349+
assert.equal(result.escalationPolicy.reasons.includes("high_risk_implementation"), true);
350+
assert.equal(result.selectedTarget?.label, "best coder");
351+
});
352+
353+
test("project override custom forceLabel is prioritized before fallback labels", () => {
354+
const balancedTarget = openaiTargets.find((target) => target.label === "balanced");
355+
assert.ok(balancedTarget, "expected balanced target fixture");
356+
357+
const targets = [
358+
{
359+
...balancedTarget,
360+
id: "custom-team-default",
361+
label: "team-default"
362+
},
363+
...openaiTargets
364+
];
365+
366+
const result = routePrompt({
367+
input: "Plan the rollout in phases with tradeoffs.",
368+
session: {
369+
mode: "plan",
370+
projectOverride: {
371+
forceLabel: "team-default"
372+
}
373+
},
374+
targets,
375+
executionSupported: false
376+
});
377+
378+
assert.equal(result.status, "ok");
379+
assert.equal(result.selectedTarget?.id, "custom-team-default");
380+
assert.equal(result.selectedTarget?.label, "team-default");
381+
});

test/switchboard-cli.test.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ test("switchboard explain summarizes latest route context and hook events", () =
8282
paths.logPath,
8383
"--route-context-path",
8484
paths.routeContextPath,
85-
"Implement the plan."
85+
"That is a wrong assumption. Compare alternatives again."
8686
],
8787
turnIo
8888
);
@@ -124,6 +124,7 @@ test("switchboard explain summarizes latest route context and hook events", () =
124124

125125
assert.equal(exitCode, 0);
126126
assert.match(explainIo.stdoutText, /Route: best coder/);
127+
assert.match(explainIo.stdoutText, /Escalation: .*user_correction/);
127128
assert.match(explainIo.stdoutText, /Route context: matched/);
128129
assert.match(explainIo.stdoutText, /Hook events: 1/);
129130
assert.match(explainIo.stdoutText, /PreToolUse correlation=matched allow/);

test/switchboard-workflow.test.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ test("Switchboard turn plans Claude launch and records separable evidence", () =
4343
assert.equal(result.routeDecision.continuityCost, "low");
4444
assert.equal(result.routeDecision.modeResolution.resolvedMode, "implement");
4545
assert.equal(result.routeDecision.policyInputs.hardConstraints.privacy, "off");
46+
assert.deepEqual(result.routeDecision.escalationPolicy?.reasons, []);
4647
assert.equal(result.selectedClaude.model, "sonnet");
4748
assert.equal(result.selectedClaude.effort, "high");
4849
assert.equal(result.selectedClaude.sessionId, "claude-session-1");
@@ -55,6 +56,7 @@ test("Switchboard turn plans Claude launch and records separable evidence", () =
5556
assert.equal(entry.userPrompt, "Implement the plan.");
5657
assert.equal(entry.wrapperContext.kind, "switchboard_context");
5758
assert.equal(entry.routeDecision.label, "best coder");
59+
assert.deepEqual(entry.routeDecision.escalationPolicy?.reasons, []);
5860
assert.equal(entry.selectedClaude.effort, "high");
5961
assert.equal(entry.session.claudeSessionId, "claude-session-1");
6062

@@ -64,6 +66,28 @@ test("Switchboard turn plans Claude launch and records separable evidence", () =
6466
assert.equal(routeContext["claude-session-1"].latest.model, "sonnet");
6567
});
6668

69+
test("Switchboard logs escalation policy details for escalated turns", () => {
70+
const { storePath, logPath, routeContextPath } = tempPaths();
71+
const result = planSwitchboardTurn({
72+
input: "That is a wrong assumption. Compare alternatives again.",
73+
threadId: "thread-escalation",
74+
sessionId: "claude-session-escalation",
75+
cwd: "/repo",
76+
storePath,
77+
logPath,
78+
routeContextPath
79+
});
80+
81+
assert.equal(result.status, "planned");
82+
assert.equal(result.routeDecision.label, "best coder");
83+
assert.equal(result.routeDecision.escalationPolicy?.applied, true);
84+
assert.equal(result.routeDecision.escalationPolicy?.reasons.includes("user_correction"), true);
85+
86+
const [entry] = readLog(logPath);
87+
assert.equal(entry.routeDecision.escalationPolicy?.applied, true);
88+
assert.equal(entry.routeDecision.escalationPolicy?.reasons.includes("user_correction"), true);
89+
});
90+
6791
test("Switchboard continuity probe preserves Claude session while route changes", () => {
6892
const { storePath, logPath, routeContextPath } = tempPaths();
6993
const result = planSwitchboardContinuityProbe({

0 commit comments

Comments
 (0)