Skip to content

Commit 0d693f2

Browse files
committed
fix: handle workflow recording failures and track maxSteps
1 parent 989cd4f commit 0d693f2

File tree

4 files changed

+72
-20
lines changed

4 files changed

+72
-20
lines changed

apps/web/app/workflows/chat-post-finish-usage.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,43 @@ describe("recordWorkflowUsage", () => {
154154
});
155155
});
156156

157+
test("continues recording usage when workflow run persistence fails", async () => {
158+
spies.recordWorkflowRun.mockImplementationOnce(() =>
159+
Promise.reject(new Error("workflow runs table missing")),
160+
);
161+
162+
const usage = makeUsage({
163+
inputTokens: 100,
164+
outputTokens: 50,
165+
totalTokens: 150,
166+
});
167+
168+
await recordWorkflowUsage(
169+
"user-1",
170+
"gpt-4",
171+
usage,
172+
makeAssistantMessage(),
173+
undefined,
174+
{
175+
workflowRunId: "wrun-1",
176+
chatId: "chat-1",
177+
sessionId: "session-1",
178+
status: "completed",
179+
startedAt: "2026-01-01T00:00:00.000Z",
180+
finishedAt: "2026-01-01T00:00:05.000Z",
181+
totalDurationMs: 5000,
182+
stepTimings: [],
183+
},
184+
);
185+
186+
expect(spies.recordWorkflowRun).toHaveBeenCalledTimes(1);
187+
expect(spies.recordUsage).toHaveBeenCalledTimes(1);
188+
expect((spies.recordUsage.mock.calls as unknown[][])[0][1]).toMatchObject({
189+
agentType: "main",
190+
model: "gpt-4",
191+
});
192+
});
193+
157194
test("skips main recording when totalUsage is undefined", async () => {
158195
await recordWorkflowUsage(
159196
"user-1",

apps/web/app/workflows/chat-post-finish.ts

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -252,18 +252,22 @@ export async function recordWorkflowUsage(
252252
await import("@open-harness/agent");
253253

254254
if (workflowRun) {
255-
await recordWorkflowRun({
256-
id: workflowRun.workflowRunId,
257-
chatId: workflowRun.chatId,
258-
sessionId: workflowRun.sessionId,
259-
userId,
260-
modelId,
261-
status: workflowRun.status,
262-
startedAt: workflowRun.startedAt,
263-
finishedAt: workflowRun.finishedAt,
264-
totalDurationMs: workflowRun.totalDurationMs,
265-
stepTimings: workflowRun.stepTimings,
266-
});
255+
try {
256+
await recordWorkflowRun({
257+
id: workflowRun.workflowRunId,
258+
chatId: workflowRun.chatId,
259+
sessionId: workflowRun.sessionId,
260+
userId,
261+
modelId,
262+
status: workflowRun.status,
263+
startedAt: workflowRun.startedAt,
264+
finishedAt: workflowRun.finishedAt,
265+
totalDurationMs: workflowRun.totalDurationMs,
266+
stepTimings: workflowRun.stepTimings,
267+
});
268+
} catch (error) {
269+
console.error("[workflow] Failed to record workflow run:", error);
270+
}
267271
}
268272

269273
// Record main agent usage

apps/web/app/workflows/chat.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ describe("runAgentWorkflow", () => {
308308
expect(rwCalls[0][1]).toBe("gpt-4");
309309
});
310310

311-
test("passes workflow timing data to recordWorkflowUsage", async () => {
311+
test("marks workflow run as failed when maxSteps is exhausted", async () => {
312312
agentFinishReason = "tool-calls";
313313
agentRawFinishReason = "provider_tool_use";
314314

@@ -331,7 +331,7 @@ describe("runAgentWorkflow", () => {
331331
};
332332

333333
expect(workflowRun.workflowRunId).toBe("wrun_test-123");
334-
expect(workflowRun.status).toBe("completed");
334+
expect(workflowRun.status).toBe("failed");
335335
expect(workflowRun.totalDurationMs).toBeGreaterThanOrEqual(0);
336336
expect(workflowRun.stepTimings).toHaveLength(2);
337337
expect(workflowRun.stepTimings).toEqual([

apps/web/app/workflows/chat.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ export async function runAgentWorkflow(options: Options) {
477477
latestMessage.role === "assistant" ? latestMessage : undefined;
478478
const stepTimings: WorkflowRunStepTiming[] = [];
479479
let wasAborted = false;
480+
let exhaustedMaxSteps = false;
480481
let totalUsage: LanguageModelUsage | undefined;
481482
let finalFinishReason: FinishReason | undefined;
482483
let streamClosed = false;
@@ -526,12 +527,18 @@ export async function runAgentWorkflow(options: Options) {
526527
: result.stepUsage;
527528
}
528529

529-
if (
530-
result.finishReason !== "tool-calls" ||
531-
shouldPauseForToolInteraction(
530+
const shouldContinue =
531+
result.finishReason === "tool-calls" &&
532+
!shouldPauseForToolInteraction(
532533
result.responseMessage?.parts ?? pendingAssistantResponse.parts,
533-
)
534-
) {
534+
);
535+
536+
if (!shouldContinue) {
537+
break;
538+
}
539+
540+
if (options.maxSteps !== undefined && step + 1 >= options.maxSteps) {
541+
exhaustedMaxSteps = true;
535542
break;
536543
}
537544
}
@@ -699,7 +706,11 @@ export async function runAgentWorkflow(options: Options) {
699706
await refreshDiffCache(options.sessionId, sandboxState);
700707
}
701708

702-
workflowStatus = wasAborted ? "aborted" : "completed";
709+
workflowStatus = wasAborted
710+
? "aborted"
711+
: exhaustedMaxSteps
712+
? "failed"
713+
: "completed";
703714
} catch (error) {
704715
workflowStatus = wasAborted ? "aborted" : "failed";
705716
caughtError = error;

0 commit comments

Comments
 (0)