Skip to content

Commit 4ba5130

Browse files
committed
fix(core): preserve single-tool user-facing output
1 parent bc21f4e commit 4ba5130

2 files changed

Lines changed: 96 additions & 8 deletions

File tree

packages/core/src/__tests__/planner-happy-path.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,65 @@ describe("v5 happy path — message handler → planner → executor → evaluat
465465
expect(evalStage?.evaluation?.decision).toBe("FINISH");
466466
});
467467

468+
it("prefers a single tool's verified user-facing text over evaluator paraphrase", async () => {
469+
const inspectRuntime = makeMockAction({
470+
name: "CHECK_RUNTIME",
471+
parameters: [],
472+
handler: async () => ({
473+
success: true,
474+
text: "raw shell output with exact paths and metrics",
475+
userFacingText:
476+
"Root disk: 65% used, 138G available. Biggest cleanup candidate: /home/milady/.bun (19G).",
477+
data: { actionName: "CHECK_RUNTIME" },
478+
}),
479+
});
480+
481+
const runtime = makeRuntime({
482+
actions: [inspectRuntime],
483+
responses: [
484+
{
485+
expectModelType: ModelType.RESPONSE_HANDLER,
486+
body: stage1Response({
487+
contexts: ["general"],
488+
candidateActionNames: ["CHECK_RUNTIME"],
489+
thought: "Runtime inspection needs a tool.",
490+
}),
491+
},
492+
{
493+
expectModelType: ModelType.ACTION_PLANNER,
494+
body: {
495+
text: "Checking runtime state.",
496+
toolCalls: [{ id: "call-1", name: "CHECK_RUNTIME", args: {} }],
497+
},
498+
},
499+
{
500+
expectModelType: ModelType.RESPONSE_HANDLER,
501+
body: JSON.stringify({
502+
success: true,
503+
decision: "FINISH",
504+
thought: "Tool result is enough.",
505+
messageToUser:
506+
"Root disk: 65% used, 138G available. Biggest cleanup candidate: /home/milody/.bun (19G).",
507+
}),
508+
},
509+
],
510+
});
511+
512+
const result = await runV5MessageRuntimeStage1({
513+
runtime,
514+
message: makeMessage("check disk space"),
515+
state: makeState(),
516+
responseId: RESPONSE_ID,
517+
});
518+
519+
expect(result.kind).toBe("planned_reply");
520+
if (result.kind === "planned_reply") {
521+
expect(result.result.responseContent?.text).toBe(
522+
"Root disk: 65% used, 138G available. Biggest cleanup candidate: /home/milady/.bun (19G).",
523+
);
524+
}
525+
});
526+
468527
it("records terminal task failure separately from evaluator failures", async () => {
469528
const brokenAction = makeMockAction({
470529
name: "BROKEN_ACTION",

packages/core/src/runtime/planner-loop.ts

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,10 @@ export async function runPlannerLoop(
264264
trajectory,
265265
evaluator,
266266
finalMessage: userSafeFinalMessage(
267-
evaluator.messageToUser ??
268-
plannerOutput.messageToUser ??
269-
latestToolResultText(trajectory),
267+
preferredFinalMessageFromToolOrModel(
268+
trajectory,
269+
evaluator.messageToUser ?? plannerOutput.messageToUser,
270+
),
270271
trajectory,
271272
),
272273
};
@@ -518,7 +519,7 @@ export async function runPlannerLoop(
518519
trajectory,
519520
evaluator: gated,
520521
finalMessage: userSafeFinalMessage(
521-
gated.messageToUser ?? latestToolResultText(trajectory),
522+
preferredFinalMessageFromToolOrModel(trajectory, gated.messageToUser),
522523
trajectory,
523524
),
524525
};
@@ -550,11 +551,13 @@ export async function runPlannerLoop(
550551
trajectory,
551552
evaluator,
552553
finalMessage: userSafeFinalMessage(
553-
evaluator.messageToUser ??
554-
latestToolResultText(trajectory) ??
555-
(evaluator.success === false
554+
preferredFinalMessageFromToolOrModel(
555+
trajectory,
556+
evaluator.messageToUser,
557+
evaluator.success === false
556558
? failedToolFallbackMessage(trajectory)
557-
: undefined),
559+
: undefined,
560+
),
558561
trajectory,
559562
),
560563
};
@@ -2175,6 +2178,32 @@ function latestToolResultText(
21752178
return undefined;
21762179
}
21772180

2181+
function singleSuccessfulUserFacingToolResultText(
2182+
trajectory: PlannerTrajectory,
2183+
): string | undefined {
2184+
const toolResultSteps = trajectory.steps.filter(
2185+
(step) => step.toolCall && step.result,
2186+
);
2187+
if (toolResultSteps.length !== 1) return undefined;
2188+
const result = toolResultSteps[0]?.result;
2189+
if (result?.success !== true) return undefined;
2190+
const text = result.userFacingText?.trim();
2191+
return text || undefined;
2192+
}
2193+
2194+
function preferredFinalMessageFromToolOrModel(
2195+
trajectory: PlannerTrajectory,
2196+
modelMessage?: unknown,
2197+
fallback?: unknown,
2198+
): string | undefined {
2199+
return (
2200+
singleSuccessfulUserFacingToolResultText(trajectory) ??
2201+
getNonEmptyString(modelMessage) ??
2202+
latestToolResultText(trajectory) ??
2203+
getNonEmptyString(fallback)
2204+
);
2205+
}
2206+
21782207
function latestFailedToolStep(
21792208
trajectory: PlannerTrajectory,
21802209
): PlannerStep | undefined {

0 commit comments

Comments
 (0)