Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions apps/web/app/workflows/chat-post-finish-usage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ function makeAssistantMessage(

const spies = {
recordUsage: mock(() => Promise.resolve()),
recordWorkflowRun: mock(() => Promise.resolve()),
collectTaskToolUsageEvents: mock(
(_message?: unknown) =>
[] as Array<{
Expand Down Expand Up @@ -64,6 +65,10 @@ mock.module("@/lib/db/usage", () => ({
recordUsage: spies.recordUsage,
}));

mock.module("@/lib/db/workflow-runs", () => ({
recordWorkflowRun: spies.recordWorkflowRun,
}));

mock.module("@open-harness/agent", () => ({
collectTaskToolUsageEvents: spies.collectTaskToolUsageEvents,
sumLanguageModelUsage: spies.sumLanguageModelUsage,
Expand Down Expand Up @@ -96,6 +101,96 @@ describe("recordWorkflowUsage", () => {
});
});

test("records workflow run timing when provided", async () => {
await recordWorkflowUsage(
"user-1",
"gpt-4",
undefined,
makeAssistantMessage(),
undefined,
{
workflowRunId: "wrun-1",
chatId: "chat-1",
sessionId: "session-1",
status: "completed",
startedAt: "2026-01-01T00:00:00.000Z",
finishedAt: "2026-01-01T00:00:05.000Z",
totalDurationMs: 5000,
stepTimings: [
{
stepNumber: 1,
startedAt: "2026-01-01T00:00:00.000Z",
finishedAt: "2026-01-01T00:00:02.000Z",
durationMs: 2000,
finishReason: "tool-calls",
rawFinishReason: "provider_tool_use",
},
{
stepNumber: 2,
startedAt: "2026-01-01T00:00:02.000Z",
finishedAt: "2026-01-01T00:00:05.000Z",
durationMs: 3000,
finishReason: "stop",
rawFinishReason: "provider_stop",
},
],
},
);

expect(spies.recordWorkflowRun).toHaveBeenCalledTimes(1);
const calls = spies.recordWorkflowRun.mock.calls as unknown[][];
expect(calls[0][0]).toMatchObject({
id: "wrun-1",
chatId: "chat-1",
sessionId: "session-1",
userId: "user-1",
modelId: "gpt-4",
status: "completed",
totalDurationMs: 5000,
stepTimings: [
expect.objectContaining({ stepNumber: 1, durationMs: 2000 }),
expect.objectContaining({ stepNumber: 2, durationMs: 3000 }),
],
});
});

test("continues recording usage when workflow run persistence fails", async () => {
spies.recordWorkflowRun.mockImplementationOnce(() =>
Promise.reject(new Error("workflow runs table missing")),
);

const usage = makeUsage({
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
});

await recordWorkflowUsage(
"user-1",
"gpt-4",
usage,
makeAssistantMessage(),
undefined,
{
workflowRunId: "wrun-1",
chatId: "chat-1",
sessionId: "session-1",
status: "completed",
startedAt: "2026-01-01T00:00:00.000Z",
finishedAt: "2026-01-01T00:00:05.000Z",
totalDurationMs: 5000,
stepTimings: [],
},
);

expect(spies.recordWorkflowRun).toHaveBeenCalledTimes(1);
expect(spies.recordUsage).toHaveBeenCalledTimes(1);
expect((spies.recordUsage.mock.calls as unknown[][])[0][1]).toMatchObject({
agentType: "main",
model: "gpt-4",
});
});

test("skips main recording when totalUsage is undefined", async () => {
await recordWorkflowUsage(
"user-1",
Expand Down
34 changes: 34 additions & 0 deletions apps/web/app/workflows/chat-post-finish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ import {
buildLifecycleActivityUpdate,
} from "@/lib/sandbox/lifecycle";
import { dedupeMessageReasoning } from "@/lib/chat/dedupe-message-reasoning";
import {
recordWorkflowRun,
type WorkflowRunStatus,
type WorkflowRunStepTiming,
} from "@/lib/db/workflow-runs";
import { recordUsage } from "@/lib/db/usage";

const cachedInputTokensFor = (usage: LanguageModelUsage) =>
Expand Down Expand Up @@ -229,13 +234,42 @@ export async function recordWorkflowUsage(
totalUsage: LanguageModelUsage | undefined,
responseMessage: WebAgentUIMessage,
previousResponseMessage?: WebAgentUIMessage,
workflowRun?: {
workflowRunId: string;
chatId: string;
sessionId: string;
status: WorkflowRunStatus;
startedAt: string;
finishedAt: string;
totalDurationMs: number;
stepTimings: WorkflowRunStepTiming[];
},
): Promise<void> {
"use step";

try {
const { collectTaskToolUsageEvents, sumLanguageModelUsage } =
await import("@open-harness/agent");

if (workflowRun) {
try {
await recordWorkflowRun({
id: workflowRun.workflowRunId,
chatId: workflowRun.chatId,
sessionId: workflowRun.sessionId,
userId,
modelId,
status: workflowRun.status,
startedAt: workflowRun.startedAt,
finishedAt: workflowRun.finishedAt,
totalDurationMs: workflowRun.totalDurationMs,
stepTimings: workflowRun.stepTimings,
});
} catch (error) {
console.error("[workflow] Failed to record workflow run:", error);
}
}

// Record main agent usage
if (totalUsage) {
await recordUsage(userId, {
Expand Down
40 changes: 40 additions & 0 deletions apps/web/app/workflows/chat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,46 @@ describe("runAgentWorkflow", () => {
expect(rwCalls[0][1]).toBe("gpt-4");
});

test("marks workflow run as failed when maxSteps is exhausted", async () => {
agentFinishReason = "tool-calls";
agentRawFinishReason = "provider_tool_use";

await runAgentWorkflow(
makeOptions({
maxSteps: 2,
}),
);

const rwCalls = spies.recordWorkflowUsage.mock.calls as unknown[][];
const workflowRun = rwCalls[0][5] as {
workflowRunId: string;
status: string;
totalDurationMs: number;
stepTimings: Array<{
stepNumber: number;
durationMs: number;
finishReason?: string;
}>;
};

expect(workflowRun.workflowRunId).toBe("wrun_test-123");
expect(workflowRun.status).toBe("failed");
expect(workflowRun.totalDurationMs).toBeGreaterThanOrEqual(0);
expect(workflowRun.stepTimings).toHaveLength(2);
expect(workflowRun.stepTimings).toEqual([
expect.objectContaining({
stepNumber: 1,
durationMs: expect.any(Number),
finishReason: "tool-calls",
}),
expect.objectContaining({
stepNumber: 2,
durationMs: expect.any(Number),
finishReason: "tool-calls",
}),
]);
});

test("logs full step diagnostics when the agent finishes with reason other", async () => {
agentFinishReason = "other";
agentRawFinishReason = "provider_other";
Expand Down
Loading
Loading