-
Notifications
You must be signed in to change notification settings - Fork 56.9k
Expand file tree
/
Copy pathverifier.ts
More file actions
85 lines (69 loc) · 2.36 KB
/
verifier.ts
File metadata and controls
85 lines (69 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import { z } from 'zod';
import { createEvalAgent } from '../../src/utils/eval-agents';
import type { WorkflowResponse } from '../clients/n8n-client';
import { MOCK_EXECUTION_VERIFY_PROMPT } from '../system-prompts/mock-execution-verify';
import type { ChecklistItem, ChecklistResult } from '../types';
// ---------------------------------------------------------------------------
// Structured output schema
// ---------------------------------------------------------------------------
const checklistResultSchema = z.object({
results: z.array(
z.object({
id: z.number(),
pass: z.boolean(),
reasoning: z.string(),
failureCategory: z.string().optional(),
rootCause: z.string().optional(),
}),
),
});
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
export async function verifyChecklist(
checklist: ChecklistItem[],
verificationArtifact: string,
_workflowJsons: WorkflowResponse[],
): Promise<ChecklistResult[]> {
const llmItems = checklist.filter((i) => i.strategy === 'llm');
const results: ChecklistResult[] = [];
if (llmItems.length > 0) {
const userMessage = `## Checklist
${JSON.stringify(llmItems, null, 2)}
## Verification Artifact
${verificationArtifact}
Verify each checklist item against the artifact above.`;
const agent = createEvalAgent('eval-checklist-verifier', {
instructions: MOCK_EXECUTION_VERIFY_PROMPT,
cache: true,
}).structuredOutput(checklistResultSchema);
const result = await agent.generate(userMessage);
const validIds = new Set(llmItems.map((i) => i.id));
const parsed = result.structuredOutput as z.infer<typeof checklistResultSchema> | undefined;
if (parsed?.results) {
for (const entry of parsed.results) {
if (
typeof entry.id === 'number' &&
typeof entry.pass === 'boolean' &&
validIds.has(entry.id)
) {
results.push({
id: entry.id,
pass: entry.pass,
reasoning: entry.reasoning ?? '',
strategy: 'llm',
failureCategory: entry.failureCategory,
rootCause: entry.rootCause,
});
}
}
} else {
console.warn(
'[verifier] structuredOutput returned null — LLM did not produce parseable results',
);
}
}
// Sort results by id for deterministic output
results.sort((a, b) => a.id - b.id);
return results;
}