feat: wire --test-runner agentforce-studio into agent test create @W-22513740@ (#430)

BunnyBlit · web-flow · commit 33ddd424cbb8 · 2026-05-28T12:17:38.000-07:00
diff --git a/command-snapshot.json b/command-snapshot.json
@@ -178,7 +178,17 @@
     "command": "agent:test:create",
     "flagAliases": [],
     "flagChars": ["o"],
-    "flags": ["api-name", "api-version", "flags-dir", "force-overwrite", "json", "preview", "spec", "target-org"],
+    "flags": [
+      "api-name",
+      "api-version",
+      "flags-dir",
+      "force-overwrite",
+      "json",
+      "preview",
+      "spec",
+      "target-org",
+      "test-runner"
+    ],
     "plugin": "@salesforce/plugin-agent"
   },
   {
diff --git a/messages/agent.test.create.md b/messages/agent.test.create.md
@@ -6,7 +6,7 @@ Create an agent test in your org using a local test spec YAML file.
 
 To run this command, you must have an agent test spec file, which is a YAML file that lists the test cases for testing a specific agent. Use the "agent generate test-spec" CLI command to generate a test spec file. Then specify the file to this command with the --spec flag, or run this command with no flags to be prompted.
 
-When this command completes, your org contains the new agent test, which you can view and edit using the Testing Center UI. This command also retrieves the metadata component (AiEvaluationDefinition) associated with the new test to your local Salesforce DX project and displays its filename.
+When this command completes, your org contains the new agent test, which you can view and edit using the Testing Center UI (legacy) or Agentforce Studio (NGT). This command also retrieves the metadata component associated with the new test to your local Salesforce DX project and displays its filename. By default, the legacy AiEvaluationDefinition is created; use --test-runner agentforce-studio to author an AiTestingDefinition (NGT) instead.
 
 After you've created the test in the org, use the "agent test run" command to run it.
 
@@ -16,7 +16,7 @@ Path to the test spec YAML file.
 
 # flags.preview.summary
 
-Preview the test metadata file (AiEvaluationDefinition) without deploying to your org.
+Preview the test metadata file without deploying to your org.
 
 # flags.force-overwrite.summary
 
@@ -40,13 +40,17 @@ API name of the new test; the API name must not exist in the org.
 
   <%= config.bin %> <%= command.id %> --spec specs/Resort_Manager-testSpec.yaml --api-name Resort_Manager_Test --preview
 
+- Author an Agentforce Studio (NGT) test from an NGT-shaped YAML; writes an AiTestingDefinition metadata file:
+
+  <%= config.bin %> <%= command.id %> --spec specs/ReturnsCheckout.ngt.yaml --api-name Returns_Checkout --test-runner agentforce-studio --target-org my-org
+
 # prompt.confirm
 
 A test with the API name %s already exists in the org. Do you want to overwrite it?
 
 # info.success
 
-Local AiEvaluationDefinition metadata XML file created at %s and agent test deployed to %s.
+Local test metadata XML file created at %s and agent test deployed to %s.
 
 # info.preview-success
 
diff --git a/src/commands/agent/test/create.ts b/src/commands/agent/test/create.ts
@@ -21,7 +21,7 @@ import { AgentTest, AgentTestCreateLifecycleStages } from '@salesforce/agents';
 import { DeployResult } from '@salesforce/source-deploy-retrieve';
 import { MultiStageOutput } from '@oclif/multi-stage-output';
 import { CLIError } from '@oclif/core/errors';
-import { makeFlags, promptForFlag, promptForYamlFile } from '../../../flags.js';
+import { makeFlags, promptForFlag, promptForYamlFile, testRunnerFlag } from '../../../flags.js';
 import yesNoOrCancel from '../../../yes-no-cancel.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
@@ -110,6 +110,7 @@ export default class AgentTestCreate extends SfCommand<AgentTestCreateResult> {
     'force-overwrite': Flags.boolean({
       summary: messages.getMessage('flags.force-overwrite.summary'),
     }),
+    'test-runner': testRunnerFlag,
   };
   private mso?: MultiStageOutput<{ path: string }>;
 
@@ -175,33 +176,37 @@ export default class AgentTestCreate extends SfCommand<AgentTestCreateResult> {
       return Promise.resolve();
     });
 
+    const testRunner = flags['test-runner'];
+    const outputDirName = testRunner === 'agentforce-studio' ? 'aiTestingDefinitions' : 'aiEvaluationDefinitions';
+
     let path;
     let contents;
     try {
       const result = await AgentTest.create(connection, apiName, spec, {
-        outputDir: join('force-app', 'main', 'default', 'aiEvaluationDefinitions'),
+        outputDir: join('force-app', 'main', 'default', outputDirName),
         preview: flags.preview,
+        testRunner,
       });
       path = result.path;
       contents = result.contents;
     } catch (error) {
       const wrapped = SfError.wrap(error);
 
-      // Check for file not found errors
-      if (
-        wrapped.message.toLowerCase().includes('not found') ||
-        wrapped.message.toLowerCase().includes('enoent') ||
-        wrapped.code === 'ENOENT'
-      ) {
+      if (wrapped.code === 'ENOENT' || wrapped.name === 'ENOENT') {
         throw new SfError(`Test spec file not found: ${spec}`, 'SpecFileNotFound', [], 2, wrapped);
       }
 
-      // Check for deployment failures (API/network)
+      // NGT validateNgtSpec errors are user-fixable spec issues — exit 1, not deploy/network.
+      if (wrapped.name?.startsWith('ngt')) {
+        throw wrapped;
+      }
+
+      // Deploy failures from the lib are bare SfErrors with the componentFailures text as message
+      // and no structured code, so message substring is the only available signal.
       if (wrapped.message.toLowerCase().includes('deploy') || wrapped.message.toLowerCase().includes('api')) {
         throw new SfError(`Deployment failed: ${wrapped.message}`, 'DeploymentFailed', [wrapped.message], 4, wrapped);
       }
 
-      // Other errors (validation, format issues) use exit 1
       throw wrapped;
     }
 
diff --git a/test/mock-projects/agent-generate-template/specs/ngtTestSpec.yaml b/test/mock-projects/agent-generate-template/specs/ngtTestSpec.yaml
@@ -0,0 +1,75 @@
+name: ReturnsCheckoutSuite
+description: Validates the Returns / Checkout flow on agent v1.
+subjectType: AGENT
+subjectName: ReturnsAgent
+subjectVersion: v1
+testCases:
+  # 1: assertion scorers - topic + action + LLM-judged outcome.
+  - inputs:
+      - utterance: 'Where is my order #12345?'
+    scorers:
+      - name: topic_sequence_match
+        expected: order_status
+      - name: action_sequence_match
+        expected: Get_Order_Status
+      - name: bot_response_rating
+        expected: 'Agent looks up the order and returns its status'
+  # 2: assertion + quality + numeric mix.
+  - inputs:
+      - utterance: 'Cancel my order'
+    scorers:
+      - name: topic_sequence_match
+        expected: returns
+      - name: bot_response_rating
+        expected: 'Agent confirms the cancellation'
+      - name: coherence
+      - name: factuality
+      - name: output_latency_milliseconds
+  # 3: multi-action expected - Python-list-string format.
+  - inputs:
+      - utterance: 'Verify identity and look up my order'
+    scorers:
+      - name: action_sequence_match
+        expected: "['Verify_Customer','Get_Order_Status']"
+  # 4: contextVariables + multi-turn conversationHistory + task_resolution.
+  - inputs:
+      - utterance: 'Yes, my email is jane@example.com'
+        contextVariables:
+          - name: RoutableId
+            value: '0Mw000000000001'
+        conversationHistory:
+          - role: user
+            message: 'I need help with my order'
+          - role: agent
+            topic: identity_verification
+            message: "I can help. What's your email on file?"
+    scorers:
+      - name: topic_sequence_match
+        expected: identity_verification
+      - name: response_match
+        expected: 'Agent verifies identity and continues'
+      - name: task_resolution
+  # 5: quality scorers without `expected`.
+  - inputs:
+      - utterance: 'Show order details'
+    scorers:
+      - name: factuality
+      - name: completeness
+  # 6: handoff scorer - expected is the target agent's DeveloperName.
+  - inputs:
+      - utterance: 'I need a sales rep'
+    scorers:
+      - name: topic_sequence_match
+        expected: handoff
+      - name: agent_handoff_match
+        expected: SDRAgent
+  # 7: multi-input - same scorers evaluate against three phrasings.
+  - inputs:
+      - utterance: "What's the status of order #12345?"
+      - utterance: 'Where is my order 12345'
+      - utterance: 'Tell me about order #12345'
+    scorers:
+      - name: topic_sequence_match
+        expected: order_status
+      - name: action_sequence_match
+        expected: Get_Order_Status
diff --git a/test/nuts/agent.test.create.nut.ts b/test/nuts/agent.test.create.nut.ts
@@ -77,4 +77,41 @@ describe('agent test create', function () {
       }
     );
   });
+
+  it('should create NGT test from NGT-shaped spec file with --test-runner agentforce-studio', () => {
+    const testApiName = genUniqueString('Test_Agent_NGT_%s');
+    const specPath = join(session.project.dir, 'specs', 'ngtTestSpec.yaml');
+
+    const commandResult = execCmd<AgentTestCreateResult>(
+      `agent test create --api-name "${testApiName}" --spec "${specPath}" --test-runner agentforce-studio --target-org ${getUsername()} --preview --json`,
+      { ensureExitCode: 0 }
+    );
+
+    const result = commandResult.jsonOutput?.result;
+    if (!result || typeof result !== 'object' || !result.path || !result.contents) {
+      throw new Error(
+        `Command failed or returned invalid result. Result type: ${typeof result}, value: ${JSON.stringify(result)}`
+      );
+    }
+
+    expect(result.path).to.be.a('string').and.not.be.empty;
+    expect(result.contents).to.be.a('string').and.not.be.empty;
+    // preview mode writes <apiName>-preview-<ISO>.xml; non-preview would be .aiTestingDefinition-meta.xml.
+    expect(result.path).to.match(/-preview-.*\.xml$/);
+    expect(result.contents).to.include('<AiTestingDefinition');
+  });
+
+  it('should fail with NGT validation error when legacy YAML is passed with --test-runner agentforce-studio', () => {
+    const testApiName = genUniqueString('Test_Agent_Legacy_%s');
+    const legacySpecPath = join(session.project.dir, 'specs', 'testSpec.yaml');
+
+    const commandResult = execCmd<AgentTestCreateResult>(
+      `agent test create --api-name "${testApiName}" --spec "${legacySpecPath}" --test-runner agentforce-studio --target-org ${getUsername()} --preview --json`,
+      { ensureExitCode: 'nonZero' }
+    );
+
+    // Legacy YAML uses top-level utterance/expectedTopic per testCase, so NGT validation fails on
+    // the missing `inputs:` array. Asserts the NGT validator runs (rather than the legacy path).
+    expect(commandResult.jsonOutput?.message ?? '').to.match(/NGT test case|inputs/i);
+  });
 });