@@ -528,6 +528,85 @@ describe("bun e2e baseline for the typescript cli", () => {
528528 expect ( backend . countByKind ( "send_message" ) ) . toBe ( 1 ) ;
529529 } ) ;
530530
531+ test ( "--scenario flag alias works like --scenario-id" , async ( ) => {
532+ await workspace . writeOpenAiScript ( buildOpenAiRules ( ) ) ;
533+
534+ const result = await runAgentprobe (
535+ [
536+ "run" ,
537+ "--endpoint" ,
538+ workspace . endpointPath ,
539+ "--scenarios" ,
540+ workspace . scenariosPath ,
541+ "--personas" ,
542+ workspace . personasPath ,
543+ "--rubric" ,
544+ workspace . rubricPath ,
545+ "--scenario" ,
546+ "billing-followup" ,
547+ ] ,
548+ {
549+ backendUrl : backend . url ,
550+ suiteDir : workspace . suiteDir ,
551+ workspace,
552+ } ,
553+ ) ;
554+
555+ expect ( result . exitCode ) . toBe ( 0 ) ;
556+ expect ( result . stdout ) . not . toContain ( "refund-smoke" ) ;
557+ expect ( result . stdout ) . toContain ( "PASS billing-followup score=0.80" ) ;
558+
559+ const runRows = queryRows (
560+ workspace . dbPath ,
561+ [ "selected_scenario_ids_json" ] ,
562+ "runs" ,
563+ "started_at DESC" ,
564+ ) ;
565+ expect ( runRows [ 0 ] ?. selected_scenario_ids_json ) . toEqual ( [
566+ "billing-followup" ,
567+ ] ) ;
568+ } ) ;
569+
570+ test ( "--scenario filters by scenario name" , async ( ) => {
571+ await workspace . writeOpenAiScript ( buildOpenAiRules ( ) ) ;
572+
573+ const result = await runAgentprobe (
574+ [
575+ "run" ,
576+ "--endpoint" ,
577+ workspace . endpointPath ,
578+ "--scenarios" ,
579+ workspace . scenariosPath ,
580+ "--personas" ,
581+ workspace . personasPath ,
582+ "--rubric" ,
583+ workspace . rubricPath ,
584+ "--scenario" ,
585+ "Billing escalation follow-up" ,
586+ ] ,
587+ {
588+ backendUrl : backend . url ,
589+ suiteDir : workspace . suiteDir ,
590+ workspace,
591+ } ,
592+ ) ;
593+
594+ expect ( result . exitCode ) . toBe ( 0 ) ;
595+ expect ( result . stdout ) . not . toContain ( "refund-smoke" ) ;
596+ expect ( result . stdout ) . toContain ( "PASS billing-followup score=0.80" ) ;
597+
598+ const runRows = queryRows (
599+ workspace . dbPath ,
600+ [ "selected_scenario_ids_json" ] ,
601+ "runs" ,
602+ "started_at DESC" ,
603+ ) ;
604+ expect ( runRows [ 0 ] ?. selected_scenario_ids_json ) . toEqual ( [
605+ "billing-followup" ,
606+ ] ) ;
607+ expect ( backend . countByKind ( "send_message" ) ) . toBe ( 1 ) ;
608+ } ) ;
609+
531610 test ( "tag filtering runs only matching scenarios" , async ( ) => {
532611 await workspace . writeOpenAiScript ( buildOpenAiRules ( ) ) ;
533612
@@ -566,6 +645,83 @@ describe("bun e2e baseline for the typescript cli", () => {
566645 expect ( backend . countByKind ( "send_message" ) ) . toBe ( 1 ) ;
567646 } ) ;
568647
648+ test ( "comma-separated --scenario-id runs multiple specific scenarios" , async ( ) => {
649+ await workspace . writeOpenAiScript ( buildOpenAiRules ( ) ) ;
650+
651+ const result = await runAgentprobe (
652+ [
653+ "run" ,
654+ "--endpoint" ,
655+ workspace . endpointPath ,
656+ "--scenarios" ,
657+ workspace . scenariosPath ,
658+ "--personas" ,
659+ workspace . personasPath ,
660+ "--rubric" ,
661+ workspace . rubricPath ,
662+ "--scenario-id" ,
663+ "refund-smoke,billing-followup" ,
664+ ] ,
665+ {
666+ backendUrl : backend . url ,
667+ suiteDir : workspace . suiteDir ,
668+ workspace,
669+ } ,
670+ ) ;
671+
672+ expect ( result . exitCode ) . toBe ( 0 ) ;
673+ expect ( result . stdout ) . toContain ( "PASS refund-smoke score=1.00" ) ;
674+ expect ( result . stdout ) . toContain ( "PASS billing-followup score=0.80" ) ;
675+
676+ const runRows = queryRows (
677+ workspace . dbPath ,
678+ [ "selected_scenario_ids_json" ] ,
679+ "runs" ,
680+ "started_at DESC" ,
681+ ) ;
682+ expect ( runRows [ 0 ] ?. selected_scenario_ids_json ) . toEqual ( [
683+ "refund-smoke" ,
684+ "billing-followup" ,
685+ ] ) ;
686+ expect ( backend . countByKind ( "send_message" ) ) . toBe ( 2 ) ;
687+ } ) ;
688+
689+ test ( "list command shows available scenarios" , async ( ) => {
690+ const result = await runAgentprobe (
691+ [ "list" , "--scenarios" , workspace . scenariosPath ] ,
692+ {
693+ backendUrl : backend . url ,
694+ suiteDir : workspace . suiteDir ,
695+ workspace,
696+ } ,
697+ ) ;
698+
699+ expect ( result . exitCode ) . toBe ( 0 ) ;
700+ expect ( result . stdout ) . toContain (
701+ "refund-smoke: Refund smoke question [smoke]" ,
702+ ) ;
703+ expect ( result . stdout ) . toContain (
704+ "billing-followup: Billing escalation follow-up [regression]" ,
705+ ) ;
706+ } ) ;
707+
708+ test ( "list command with --tags filters scenarios" , async ( ) => {
709+ const result = await runAgentprobe (
710+ [ "list" , "--scenarios" , workspace . scenariosPath , "--tags" , "smoke" ] ,
711+ {
712+ backendUrl : backend . url ,
713+ suiteDir : workspace . suiteDir ,
714+ workspace,
715+ } ,
716+ ) ;
717+
718+ expect ( result . exitCode ) . toBe ( 0 ) ;
719+ expect ( result . stdout ) . toContain (
720+ "refund-smoke: Refund smoke question [smoke]" ,
721+ ) ;
722+ expect ( result . stdout ) . not . toContain ( "billing-followup" ) ;
723+ } ) ;
724+
569725 test ( "no-match filtering returns a configuration error without target traffic" , async ( ) => {
570726 await workspace . writeOpenAiScript ( { rules : [ ] } ) ;
571727
@@ -598,6 +754,37 @@ describe("bun e2e baseline for the typescript cli", () => {
598754 expect ( await readOpenAiLog ( workspace . openAiLogPath ) ) . toHaveLength ( 0 ) ;
599755 } ) ;
600756
757+ test ( "no-match scenario-id returns a configuration error with available ids" , async ( ) => {
758+ await workspace . writeOpenAiScript ( { rules : [ ] } ) ;
759+
760+ const result = await runAgentprobe (
761+ [
762+ "run" ,
763+ "--endpoint" ,
764+ workspace . endpointPath ,
765+ "--scenarios" ,
766+ workspace . scenariosPath ,
767+ "--personas" ,
768+ workspace . personasPath ,
769+ "--rubric" ,
770+ workspace . rubricPath ,
771+ "--scenario-id" ,
772+ "does-not-exist" ,
773+ ] ,
774+ {
775+ backendUrl : backend . url ,
776+ suiteDir : workspace . suiteDir ,
777+ workspace,
778+ } ,
779+ ) ;
780+
781+ expect ( result . exitCode ) . toBe ( 2 ) ;
782+ expect ( result . stderr ) . toContain ( "does-not-exist" ) ;
783+ expect ( result . stderr ) . toContain ( "refund-smoke" ) ;
784+ expect ( result . stderr ) . toContain ( "billing-followup" ) ;
785+ expect ( backend . requestLog ) . toHaveLength ( 0 ) ;
786+ } ) ;
787+
601788 test ( "dry-run avoids backend and openai calls while still recording the run" , async ( ) => {
602789 await workspace . writeOpenAiScript ( { rules : [ ] } ) ;
603790
0 commit comments