fix(core): restore evaluator messageToUser precedence, opt-in canonical tool text

NubsCarson · claude · NubsCarson · commit 79aaaf2fffc2 · 2026-05-23T10:54:50.000Z
The Server Tests upstream regression (planner-loop-user-facing-text →
"does not regress evaluator's explicit messageToUser path") fails on
develop because preferredFinalMessageFromToolOrModel preferred a single
successful tool's userFacingText OVER the evaluator's explicit
messageToUser. Shaw's regression test asserts the opposite: when the
evaluator emits an explicit messageToUser, it wins.

Reconciling both intents without picking one over the other: add an
opt-in flag verifiedUserFacing on ActionResult / PlannerToolResult.
Tools that emit structured outputs where evaluator paraphrase risks
hallucinating values (paths, ids, counts, numeric metrics) set
verifiedUserFacing: true to mark their userFacingText canonical. The
planner-loop then echoes the tool verbatim instead of letting the
evaluator paraphrase it. Without the flag, the evaluator's explicit
messageToUser wins (Shaw's invariant).

Precedence in preferredFinalMessageFromToolOrModel is now:
  1. Single successful tool with verifiedUserFacing === true
  2. Evaluator/model messageToUser
  3. Most recent tool userFacingText (fallback)
  4. Caller-provided fallback

Changes:
- packages/core/src/types/components.ts: add verifiedUserFacing to
  ActionResult with JSDoc explaining when to opt in.
- packages/core/src/runtime/planner-types.ts: add verifiedUserFacing to
  PlannerToolResult with matching contract.
- packages/core/src/runtime/execute-planned-tool-call.ts and
  packages/core/src/runtime/planner-loop.ts
  (actionResultToPlannerToolResult): propagate the field through both
  ActionResult → PlannerToolResult conversion paths.
- packages/core/src/runtime/planner-loop.ts:
  - Rename singleSuccessfulUserFacingToolResultText →
    singleVerifiedUserFacingToolResultText and require
    verifiedUserFacing === true.
  - Reorder preferredFinalMessageFromToolOrModel to put verified-tool
    first, then evaluator, then fallback chain.
- packages/core/src/__tests__/planner-happy-path.test.ts: the
  "prefers a single tool's verified user-facing text over evaluator
  paraphrase" test now sets verifiedUserFacing: true (its semantic
  intent — "this is canonical structured data the evaluator could
  hallucinate") so the canonical-output guarantee still holds.

Verified:
- 1362 tests pass, 11 skipped (full packages/core suite, 165 files)
- bun run lint:check: 12 warnings before == 12 after (no new flags)
- bun run typecheck: clean

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/packages/core/src/__tests__/planner-happy-path.test.ts b/packages/core/src/__tests__/planner-happy-path.test.ts
@@ -474,6 +474,10 @@ describe("v5 happy path — message handler → planner → executor → evaluat
 				text: "raw shell output with exact paths and metrics",
 				userFacingText:
 					"Root disk: 65% used, 138G available. Biggest cleanup candidate: /home/example/.bun (19G).",
+				// Marks userFacingText as canonical so the planner-loop will not
+				// fall back to the evaluator's paraphrase (which can hallucinate
+				// paths/numbers in this kind of structured output).
+				verifiedUserFacing: true,
 				data: { actionName: "CHECK_RUNTIME" },
 			}),
 		});
diff --git a/packages/core/src/runtime/execute-planned-tool-call.ts b/packages/core/src/runtime/execute-planned-tool-call.ts
@@ -359,6 +359,7 @@ function actionResultToStreamingResult(
 		success: result.success,
 		text: result.text,
 		userFacingText: result.userFacingText,
+		verifiedUserFacing: result.verifiedUserFacing,
 		error: result.error ? stringifyError(result.error) : undefined,
 		data: result.data,
 		values: result.values,
diff --git a/packages/core/src/runtime/planner-loop.ts b/packages/core/src/runtime/planner-loop.ts
@@ -2178,7 +2178,16 @@ function latestToolResultText(
 	return undefined;
 }
 
-function singleSuccessfulUserFacingToolResultText(
+/**
+ * Returns a single successful tool's `userFacingText` ONLY when the tool
+ * explicitly opted in to canonical-output via `verifiedUserFacing: true`.
+ *
+ * Tools that emit structured data the evaluator could easily paraphrase
+ * incorrectly (paths, ids, counts, numeric metrics) set the flag so the
+ * framework echoes their output verbatim instead of trusting the
+ * evaluator's rewording.
+ */
+function singleVerifiedUserFacingToolResultText(
 	trajectory: PlannerTrajectory,
 ): string | undefined {
 	const toolResultSteps = trajectory.steps.filter(
@@ -2187,6 +2196,7 @@ function singleSuccessfulUserFacingToolResultText(
 	if (toolResultSteps.length !== 1) return undefined;
 	const result = toolResultSteps[0]?.result;
 	if (result?.success !== true) return undefined;
+	if (result.verifiedUserFacing !== true) return undefined;
 	const text = result.userFacingText?.trim();
 	return text || undefined;
 }
@@ -2196,8 +2206,27 @@ function preferredFinalMessageFromToolOrModel(
 	modelMessage?: unknown,
 	fallback?: unknown,
 ): string | undefined {
+	// Precedence:
+	//   1. A single successful tool whose result was explicitly marked
+	//      `verifiedUserFacing: true` — used for structured outputs
+	//      (paths, ids, counts) where evaluator paraphrase risks
+	//      hallucinating a value.
+	//   2. The model/evaluator's explicit `messageToUser` — authoritative
+	//      by default; the evaluator has seen the full trajectory and
+	//      chose what the user should read.
+	//   3. The most recent tool's `userFacingText` — fallback when neither
+	//      the model nor any verified tool provided a clean reply.
+	//   4. An explicit caller-provided fallback (e.g. failed-tool message).
+	//
+	// Regression coverage:
+	//   - `planner-loop-user-facing-text.test.ts` → "does not regress
+	//     evaluator's explicit messageToUser path" — evaluator wins when
+	//     no tool sets `verifiedUserFacing`.
+	//   - `planner-happy-path.test.ts` → "prefers a single tool's verified
+	//     user-facing text over evaluator paraphrase" — tool wins when it
+	//     opts in via `verifiedUserFacing: true`.
 	return (
-		singleSuccessfulUserFacingToolResultText(trajectory) ??
+		singleVerifiedUserFacingToolResultText(trajectory) ??
 		getNonEmptyString(modelMessage) ??
 		latestToolResultText(trajectory) ??
 		getNonEmptyString(fallback)
@@ -2460,6 +2489,7 @@ export function actionResultToPlannerToolResult(
 		success: result.success,
 		text: result.text,
 		userFacingText: result.userFacingText,
+		verifiedUserFacing: result.verifiedUserFacing,
 		data: Object.keys(data).length > 0 ? data : undefined,
 		error: result.error,
 		continueChain: result.continueChain,
diff --git a/packages/core/src/runtime/planner-types.ts b/packages/core/src/runtime/planner-types.ts
@@ -99,8 +99,29 @@ export interface PlannerToolResult {
 	 * undefined; in that case the framework falls through to the
 	 * evaluator's synthesized reply rather than dumping shell-wrapper
 	 * text into the user channel.
+	 *
+	 * By default an explicit evaluator `messageToUser` outranks this —
+	 * the evaluator has seen the full trajectory and chose what the
+	 * user should read. To mark `userFacingText` as canonical
+	 * (do-not-paraphrase) and have it outrank the evaluator's reply
+	 * when there is exactly one successful tool, set
+	 * `verifiedUserFacing: true`.
 	 */
 	userFacingText?: string;
+	/**
+	 * Marks `userFacingText` as the canonical answer for this turn —
+	 * the evaluator's `messageToUser` MUST NOT paraphrase it. When set
+	 * AND there is exactly one successful tool with `userFacingText`,
+	 * the planner-loop prefers the tool's text over the evaluator's
+	 * reply for the terminal-FINISH `finalMessage`.
+	 *
+	 * Use when the tool's output is structured data the evaluator can
+	 * easily hallucinate (paths, ids, counts, numeric metrics) and any
+	 * paraphrase risk is worse than echoing the tool verbatim. Leave
+	 * unset for natural-language answers where the evaluator may
+	 * legitimately rephrase or add framing.
+	 */
+	verifiedUserFacing?: boolean;
 	data?: Record<string, unknown>;
 	error?: unknown;
 	continueChain?: boolean;
diff --git a/packages/core/src/types/components.ts b/packages/core/src/types/components.ts
@@ -659,9 +659,23 @@ export interface ActionResult {
 	 * instead of the diagnostic `text`. Leave unset for log-emitting
 	 * actions (BASH, file readers); set for Q&A actions, REPLY actions,
 	 * and content generators.
+	 *
+	 * By default an explicit evaluator `messageToUser` outranks this.
+	 * Set `verifiedUserFacing: true` to mark this text as canonical
+	 * (do-not-paraphrase) — e.g. when it contains paths, ids, counts,
+	 * or numeric metrics the evaluator might otherwise hallucinate.
 	 */
 	userFacingText?: string;
 
+	/**
+	 * When `true` and `userFacingText` is set, the planner-loop prefers
+	 * the action's `userFacingText` over the evaluator's `messageToUser`
+	 * for the terminal-FINISH reply. Use for structured outputs
+	 * (paths, ids, counts, numeric metrics) where a paraphrase risk is
+	 * worse than echoing the action verbatim.
+	 */
+	verifiedUserFacing?: boolean;
+
 	/** Values to merge into the state */
 	values?: Record<string, ProviderValue>;