Skip to content

Commit 0eb4a6d

Browse files
Merge pull request #350 from salesforcecli/feat/agent-test-run-eval
Feat/agent test run eval @W-21482725@
2 parents e736f2b + fc1c1db commit 0eb4a6d

File tree

15 files changed

+3118
-0
lines changed

15 files changed

+3118
-0
lines changed

command-snapshot.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,25 @@
213213
],
214214
"plugin": "@salesforce/plugin-agent"
215215
},
216+
{
217+
"alias": [],
218+
"command": "agent:test:run-eval",
219+
"flagAliases": [],
220+
"flagChars": ["n", "o", "s", "w"],
221+
"flags": [
222+
"api-name",
223+
"api-version",
224+
"batch-size",
225+
"flags-dir",
226+
"json",
227+
"no-normalize",
228+
"result-format",
229+
"spec",
230+
"target-org",
231+
"wait"
232+
],
233+
"plugin": "@salesforce/plugin-agent"
234+
},
216235
{
217236
"alias": [],
218237
"command": "agent:validate:authoring-bundle",

messages/agent.test.run-eval.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# summary
2+
3+
Run evaluation tests against an Agentforce agent.
4+
5+
# description
6+
7+
Execute rich evaluation tests against an Agentforce agent using the Einstein Evaluation API. Supports both YAML test specs (same format as `sf agent generate test-spec`) and JSON payloads.
8+
9+
When you provide a YAML test spec, the command automatically translates test cases into Evaluation API calls and infers the agent name from the spec's `subjectName` field. This means you can use the same test spec with both `sf agent test run` and `sf agent test run-eval`.
10+
11+
When you provide a JSON payload, it's sent directly to the API with optional normalization. The normalizer auto-corrects common field name mistakes, converts shorthand references to JSONPath, and injects defaults. Use `--no-normalize` to disable this auto-normalization.
12+
13+
Supports 8+ evaluator types, including topic routing assertions, action invocation checks, string/numeric assertions, semantic similarity scoring, and LLM-based quality ratings.
14+
15+
# flags.spec.summary
16+
17+
Path to test spec file (YAML or JSON). Use `-` for stdin.
18+
19+
# flags.api-name.summary
20+
21+
Agent DeveloperName (also called API name) to resolve agent_id and agent_version_id. Auto-inferred from the YAML spec's subjectName.
22+
23+
# flags.wait.summary
24+
25+
Number of minutes to wait for results.
26+
27+
# flags.result-format.summary
28+
29+
Format of the agent test results.
30+
31+
# flags.batch-size.summary
32+
33+
Number of tests per API request (max 5).
34+
35+
# flags.no-normalize.summary
36+
37+
Disable auto-normalization of field names and shorthand references.
38+
39+
# examples
40+
41+
- Run tests using a YAML test spec on the org with alias "my-org":
42+
43+
<%= config.bin %> <%= command.id %> --spec tests/my-agent-testSpec.yaml --target-org my-org
44+
45+
- Run tests using a YAML spec with explicit agent name override; use your default org:
46+
47+
<%= config.bin %> <%= command.id %> --spec tests/my-agent-testSpec.yaml --api-name My_Agent --target-org my-org
48+
49+
- Run tests using a JSON payload:
50+
51+
<%= config.bin %> <%= command.id %> --spec tests/eval-payload.json --target-org my-org
52+
53+
- Run tests and output results in JUnit format; useful for continuous integration and deployment (CI/CD):
54+
55+
<%= config.bin %> <%= command.id %> --spec tests/my-agent-testSpec.yaml --target-org my-org --result-format junit
56+
57+
- Pipe JSON payload from stdin:
58+
59+
$ echo '{"tests":[...]}' | <%= config.bin %> <%= command.id %> --spec - --target-org my-org
60+
61+
# info.batchProgress
62+
63+
Running batch %s of %s (%s tests)...
64+
65+
# info.testComplete
66+
67+
Test %s: %s.
68+
69+
# info.summary
70+
71+
Results: %s passed, %s failed, %s scored, %s errors.
72+
73+
# info.yamlDetected
74+
75+
Detected YAML test spec for agent '%s' with %s test case(s). Translating to Evaluation API format.
76+
77+
# error.invalidPayload
78+
79+
Invalid test payload: %s.
80+
81+
# error.apiError
82+
83+
Einstein Eval API error (HTTP %s): %s
84+
85+
# error.agentNotFound
86+
87+
No agent found with DeveloperName (also API name) '%s'. Verify that the agent exists in the target org.
88+
89+
# error.agentVersionNotFound
90+
91+
No published version found for agent '%s'. Make sure the agent has been published at least once.

schemas/agent-test-run__eval.json

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$ref": "#/definitions/RunEvalResult",
4+
"definitions": {
5+
"RunEvalResult": {
6+
"type": "object",
7+
"properties": {
8+
"tests": {
9+
"type": "array",
10+
"items": {
11+
"type": "object",
12+
"properties": {
13+
"id": {
14+
"type": "string"
15+
},
16+
"status": {
17+
"type": "string"
18+
},
19+
"evaluations": {
20+
"type": "array",
21+
"items": {}
22+
}
23+
},
24+
"required": ["id", "status", "evaluations"],
25+
"additionalProperties": false
26+
}
27+
},
28+
"summary": {
29+
"type": "object",
30+
"properties": {
31+
"passed": {
32+
"type": "number"
33+
},
34+
"failed": {
35+
"type": "number"
36+
},
37+
"scored": {
38+
"type": "number"
39+
},
40+
"errors": {
41+
"type": "number"
42+
}
43+
},
44+
"required": ["passed", "failed", "scored", "errors"],
45+
"additionalProperties": false
46+
}
47+
},
48+
"required": ["tests", "summary"],
49+
"additionalProperties": false
50+
}
51+
}
52+
}

0 commit comments

Comments
 (0)