Skip to content

Commit 1f9577e

Browse files
test: add NUTs
1 parent 531e427 commit 1f9577e

File tree

4 files changed

+284
-0
lines changed

4 files changed

+284
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"tests": [
3+
{
4+
"id": "test-topic-routing",
5+
"steps": [
6+
{
7+
"type": "agent.create_session",
8+
"id": "session"
9+
},
10+
{
11+
"type": "agent.send_message",
12+
"id": "msg1",
13+
"session_id": "{session.session_id}",
14+
"utterance": "What's the weather like today?"
15+
},
16+
{
17+
"type": "agent.get_state",
18+
"id": "state1",
19+
"session_id": "{session.session_id}"
20+
},
21+
{
22+
"type": "evaluator.planner_topic_assertion",
23+
"id": "check-topic",
24+
"actual": "{state1.response.planner_response.lastExecution.topic}",
25+
"expected": "Weather_and_Temperature_Information",
26+
"operator": "equals"
27+
}
28+
]
29+
}
30+
]
31+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: Weather_Test
2+
description: Test weather agent topic routing
3+
subjectType: AGENT
4+
subjectName: Local_Info_Agent
5+
testCases:
6+
- utterance: 'What is the weather?'
7+
expectedTopic: Weather_and_Temperature_Information
8+
expectedActions: []
9+
expectedOutcome: 'The agent should provide weather information'
10+
- utterance: 'Tell me about the temperature'
11+
expectedTopic: Weather_and_Temperature_Information
12+
expectedActions: []
13+
expectedOutcome: 'The agent should provide temperature information'
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"tests": "this should be an array not a string"
3+
}
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
* Copyright 2026, Salesforce, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { join } from 'node:path';
18+
import { expect } from 'chai';
19+
import { execCmd } from '@salesforce/cli-plugins-testkit';
20+
import type { RunEvalResult } from '../../src/commands/agent/test/run-eval.js';
21+
import { getTestSession, getUsername } from './shared-setup.js';
22+
23+
/* eslint-disable no-console */
24+
25+
describe('agent test run-eval', function () {
26+
// Increase timeout for setup since shared setup includes long waits and deployments
27+
this.timeout(30 * 60 * 1000); // 30 minutes
28+
29+
const mockProjectDir = join(process.cwd(), 'test', 'mock-projects', 'agent-generate-template', 'specs');
30+
const jsonPayloadPath = join(mockProjectDir, 'eval-payload.json');
31+
const yamlSpecPath = join(mockProjectDir, 'eval-test-spec.yaml');
32+
33+
before(async function () {
34+
this.timeout(30 * 60 * 1000); // 30 minutes for setup
35+
await getTestSession();
36+
});
37+
38+
describe('run-eval with JSON file', () => {
39+
it('should run evaluation with JSON payload file', async () => {
40+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
41+
const output = execCmd<RunEvalResult>(command, {
42+
ensureExitCode: 0,
43+
}).jsonOutput;
44+
45+
expect(output?.result).to.be.ok;
46+
expect(output?.result.tests).to.be.an('array');
47+
expect(output?.result.tests.length).to.be.greaterThan(0);
48+
expect(output?.result.summary).to.be.ok;
49+
expect(output?.result.summary.passed).to.be.a('number');
50+
expect(output?.result.summary.failed).to.be.a('number');
51+
expect(output?.result.summary.scored).to.be.a('number');
52+
expect(output?.result.summary.errors).to.be.a('number');
53+
});
54+
55+
it('should run evaluation with normalized payload', async () => {
56+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
57+
const output = execCmd<RunEvalResult>(command, {
58+
ensureExitCode: 0,
59+
}).jsonOutput;
60+
61+
expect(output?.result.tests[0]).to.be.ok;
62+
expect(output?.result.tests[0].id).to.equal('test-topic-routing');
63+
expect(output?.result.tests[0].status).to.be.oneOf(['passed', 'failed']);
64+
expect(output?.result.tests[0].evaluations).to.be.an('array');
65+
});
66+
});
67+
68+
describe('run-eval with YAML file', () => {
69+
it('should run evaluation with YAML test spec file', async () => {
70+
const command = `agent test run-eval --spec ${yamlSpecPath} --target-org ${getUsername()} --json`;
71+
const output = execCmd<RunEvalResult>(command, {
72+
ensureExitCode: 0,
73+
}).jsonOutput;
74+
75+
expect(output?.result).to.be.ok;
76+
expect(output?.result.tests).to.be.an('array');
77+
expect(output?.result.tests.length).to.be.greaterThan(0);
78+
expect(output?.result.summary).to.be.ok;
79+
});
80+
81+
it('should auto-infer agent name from YAML subjectName', async () => {
82+
const command = `agent test run-eval --spec ${yamlSpecPath} --target-org ${getUsername()} --json`;
83+
const output = execCmd<RunEvalResult>(command, {
84+
ensureExitCode: 0,
85+
}).jsonOutput;
86+
87+
// Should succeed without explicit --agent-api-name flag
88+
expect(output?.result).to.be.ok;
89+
expect(output?.result.tests).to.be.an('array');
90+
});
91+
});
92+
93+
describe('run-eval with stdin', () => {
94+
it('should run evaluation with JSON payload from stdin', async () => {
95+
const command = `cat ${jsonPayloadPath} | sf agent test run-eval --spec - --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
96+
const output = execCmd<RunEvalResult>(command, {
97+
ensureExitCode: 0,
98+
cli: 'sf',
99+
}).jsonOutput;
100+
101+
expect(output?.result).to.be.ok;
102+
expect(output?.result.tests).to.be.an('array');
103+
expect(output?.result.tests.length).to.be.greaterThan(0);
104+
});
105+
106+
it('should run evaluation with YAML spec from stdin', async () => {
107+
const command = `cat ${yamlSpecPath} | sf agent test run-eval --spec - --target-org ${getUsername()} --json`;
108+
const output = execCmd<RunEvalResult>(command, {
109+
ensureExitCode: 0,
110+
cli: 'sf',
111+
}).jsonOutput;
112+
113+
expect(output?.result).to.be.ok;
114+
expect(output?.result.tests).to.be.an('array');
115+
expect(output?.result.tests.length).to.be.greaterThan(0);
116+
});
117+
});
118+
119+
describe('run-eval with flags', () => {
120+
it('should respect --no-normalize flag', async () => {
121+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --no-normalize --target-org ${getUsername()} --json`;
122+
const output = execCmd<RunEvalResult>(command, {
123+
ensureExitCode: 0,
124+
}).jsonOutput;
125+
126+
expect(output?.result).to.be.ok;
127+
expect(output?.result.tests).to.be.an('array');
128+
});
129+
130+
it('should use custom batch size', async () => {
131+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --batch-size 1 --target-org ${getUsername()} --json`;
132+
const output = execCmd<RunEvalResult>(command, {
133+
ensureExitCode: 0,
134+
}).jsonOutput;
135+
136+
expect(output?.result).to.be.ok;
137+
expect(output?.result.tests).to.be.an('array');
138+
});
139+
140+
it('should support different result formats', async () => {
141+
// Test human format (default)
142+
const humanCommand = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --result-format human --target-org ${getUsername()}`;
143+
const humanOutput = execCmd(humanCommand, {
144+
ensureExitCode: 0,
145+
}).shellOutput.stdout;
146+
147+
expect(humanOutput).to.be.ok;
148+
expect(humanOutput).to.be.a('string');
149+
150+
// Test tap format
151+
const tapCommand = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --result-format tap --target-org ${getUsername()}`;
152+
const tapOutput = execCmd(tapCommand, {
153+
ensureExitCode: 0,
154+
}).shellOutput.stdout;
155+
156+
expect(tapOutput).to.include('TAP version');
157+
158+
// Test junit format
159+
const junitCommand = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --result-format junit --target-org ${getUsername()}`;
160+
const junitOutput = execCmd(junitCommand, {
161+
ensureExitCode: 0,
162+
}).shellOutput.stdout;
163+
164+
expect(junitOutput).to.include('<?xml');
165+
expect(junitOutput).to.include('testsuite');
166+
});
167+
});
168+
169+
describe('run-eval error handling', () => {
170+
it('should fail with invalid JSON payload', async () => {
171+
const invalidJson = join(mockProjectDir, 'invalid-payload.json');
172+
const command = `agent test run-eval --spec ${invalidJson} --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
173+
174+
try {
175+
execCmd<RunEvalResult>(command);
176+
expect.fail('Should have thrown an error for invalid JSON');
177+
} catch (error) {
178+
expect((error as Error).message).to.include('exit code');
179+
}
180+
});
181+
182+
it('should fail when agent not found', async () => {
183+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name NonExistentAgent --target-org ${getUsername()} --json`;
184+
185+
try {
186+
execCmd<RunEvalResult>(command);
187+
expect.fail('Should have thrown an error for non-existent agent');
188+
} catch (error) {
189+
expect((error as Error).message).to.include('exit code');
190+
}
191+
});
192+
193+
it('should require --spec flag', async () => {
194+
const command = `agent test run-eval --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
195+
196+
try {
197+
execCmd<RunEvalResult>(command);
198+
expect.fail('Should have thrown an error for missing --spec');
199+
} catch (error) {
200+
expect((error as Error).message).to.include('required');
201+
}
202+
});
203+
});
204+
205+
describe('run-eval output structure', () => {
206+
it('should include test summaries with correct structure', async () => {
207+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
208+
const output = execCmd<RunEvalResult>(command, {
209+
ensureExitCode: 0,
210+
}).jsonOutput;
211+
212+
expect(output?.result.tests).to.be.an('array');
213+
const firstTest = output?.result.tests[0];
214+
expect(firstTest).to.have.property('id');
215+
expect(firstTest).to.have.property('status');
216+
expect(firstTest).to.have.property('evaluations');
217+
expect(firstTest?.evaluations).to.be.an('array');
218+
});
219+
220+
it('should include summary with all metrics', async () => {
221+
const command = `agent test run-eval --spec ${jsonPayloadPath} --agent-api-name Local_Info_Agent --target-org ${getUsername()} --json`;
222+
const output = execCmd<RunEvalResult>(command, {
223+
ensureExitCode: 0,
224+
}).jsonOutput;
225+
226+
const summary = output?.result.summary;
227+
expect(summary).to.have.property('passed');
228+
expect(summary).to.have.property('failed');
229+
expect(summary).to.have.property('scored');
230+
expect(summary).to.have.property('errors');
231+
expect(summary?.passed).to.be.a('number');
232+
expect(summary?.failed).to.be.a('number');
233+
expect(summary?.scored).to.be.a('number');
234+
expect(summary?.errors).to.be.a('number');
235+
});
236+
});
237+
});

0 commit comments

Comments
 (0)