@@ -94,6 +94,7 @@ vi.mock("../trace-viewer", () => ({
9494 trace ?: { messages ?: Array < { content ?: unknown } > } | null ;
9595 forcedViewMode ?: string ;
9696 isLoading ?: boolean ;
97+ expectedToolCalls ?: Array < { toolName : string } > ;
9798 } ) => {
9899 mockTraceViewer ( props ) ;
99100 const firstMessage = props . trace ?. messages ?. [ 0 ] ?. content ;
@@ -106,6 +107,7 @@ vi.mock("../trace-viewer", () => ({
106107 data-first-message = {
107108 typeof firstMessage === "string" ? firstMessage : "non-string"
108109 }
110+ data-expected-tool-count = { String ( props . expectedToolCalls ?. length ?? 0 ) }
109111 />
110112 ) ;
111113 } ,
@@ -252,6 +254,7 @@ describe("TestTemplateEditor run view from route", () => {
252254 trace ?: { messages ?: Array < { content ?: unknown } > } | null ;
253255 forcedViewMode ?: string ;
254256 isLoading ?: boolean ;
257+ expectedToolCalls ?: Array < { toolName : string ; arguments : Record < string , unknown > } > ;
255258 } ;
256259 }
257260
@@ -673,6 +676,85 @@ describe("TestTemplateEditor run view from route", () => {
673676 expect ( screen . queryByText ( "Running GPT-4…" ) ) . not . toBeInTheDocument ( ) ;
674677 } ) ;
675678
679+ it ( "renders a tools preview (not generic spinner) before the first stream event when the case has expected tool calls" , async ( ) => {
680+ const user = userEvent . setup ( ) ;
681+ const caseWithTools = {
682+ ...caseDoc ,
683+ isNegativeTest : false ,
684+ expectedToolCalls : [ { toolName : "create_view" , arguments : { } } ] ,
685+ } ;
686+
687+ useQueryMock . mockImplementation ( ( name : string , args : unknown ) => {
688+ if ( name === "testSuites:listTestCases" ) return [ caseWithTools ] ;
689+ if ( name === "testSuites:getTestSuite" ) {
690+ return { _id : "suite-1" , environment : { servers : [ "srv" ] } } ;
691+ }
692+ if ( name === "testSuites:listTestIterations" && args !== "skip" ) {
693+ return [ baseIteration ] ;
694+ }
695+ if (
696+ name === "testSuites:getTestIteration" &&
697+ typeof args === "object" &&
698+ args !== null &&
699+ ( args as { iterationId ?: string } ) . iterationId === baseIteration . _id
700+ ) {
701+ return baseIteration ;
702+ }
703+ return undefined ;
704+ } ) ;
705+
706+ // Stream never resolves — keeps the run in "running, no iteration" state.
707+ streamEvalTestCaseMock . mockImplementation (
708+ async ( ) => new Promise < void > ( ( ) => { } ) ,
709+ ) ;
710+
711+ renderWithProviders (
712+ < TestTemplateEditor
713+ suiteId = "suite-1"
714+ selectedTestCaseId = "case-1"
715+ connectedServerNames = { new Set ( [ "srv" ] ) }
716+ workspaceId = { null }
717+ availableModels = { [
718+ {
719+ provider : "openai" ,
720+ id : "gpt-4" ,
721+ model : "gpt-4" ,
722+ name : "GPT-4" ,
723+ label : "GPT-4" ,
724+ } as any ,
725+ ] }
726+ /> ,
727+ { hostStyle : "claude" } ,
728+ ) ;
729+
730+ await waitFor ( ( ) => {
731+ expect ( screen . getByRole ( "button" , { name : / r u n $ / i } ) ) . toBeInTheDocument ( ) ;
732+ } ) ;
733+
734+ await user . click ( screen . getByRole ( "button" , { name : / r u n $ / i } ) ) ;
735+
736+ await waitFor ( ( ) => {
737+ expect ( streamEvalTestCaseMock ) . toHaveBeenCalledTimes ( 1 ) ;
738+ expect ( screen . getByTestId ( "mock-trace-viewer" ) ) . toBeInTheDocument ( ) ;
739+ } ) ;
740+
741+ // Must show tools view (not chat) and pass expected tool calls through.
742+ expect ( screen . getByTestId ( "mock-trace-viewer" ) ) . toHaveAttribute (
743+ "data-view-mode" ,
744+ "tools" ,
745+ ) ;
746+ expect ( screen . getByTestId ( "mock-trace-viewer" ) ) . toHaveAttribute (
747+ "data-is-loading" ,
748+ "true" ,
749+ ) ;
750+ expect ( screen . getByTestId ( "mock-trace-viewer" ) ) . toHaveAttribute (
751+ "data-expected-tool-count" ,
752+ "1" ,
753+ ) ;
754+ // Generic spinner must not appear.
755+ expect ( screen . queryByText ( / R u n n i n g G P T - 4 / ) ) . not . toBeInTheDocument ( ) ;
756+ } ) ;
757+
676758 it ( "replaces the initial preview with streamed chat messages as soon as live trace data exists" , async ( ) => {
677759 const user = userEvent . setup ( ) ;
678760 let emitEvent :
@@ -811,6 +893,94 @@ describe("TestTemplateEditor run view from route", () => {
811893 } ) ;
812894 } ) ;
813895
896+ it ( "defaults to Results tab when expected tool calls are on a non-first prompt turn (multi-turn case)" , async ( ) => {
897+ const user = userEvent . setup ( ) ;
898+ // Multi-turn case: turn 1 has no expected tool calls, turn 2 has one.
899+ const multiTurnCase = {
900+ ...caseDoc ,
901+ isNegativeTest : false ,
902+ expectedToolCalls : [ ] ,
903+ promptTurns : [
904+ {
905+ id : "turn-1" ,
906+ prompt : "First prompt" ,
907+ expectedToolCalls : [ ] ,
908+ } ,
909+ {
910+ id : "turn-2" ,
911+ prompt : "Second prompt" ,
912+ expectedToolCalls : [ { toolName : "some_tool" , arguments : { } } ] ,
913+ } ,
914+ ] ,
915+ } ;
916+
917+ useQueryMock . mockImplementation ( ( name : string , args : unknown ) => {
918+ if ( name === "testSuites:listTestCases" ) {
919+ return [ multiTurnCase ] ;
920+ }
921+ if ( name === "testSuites:getTestSuite" ) {
922+ return {
923+ _id : "suite-1" ,
924+ environment : { servers : [ "srv" ] } ,
925+ } ;
926+ }
927+ if ( name === "testSuites:listTestIterations" && args !== "skip" ) {
928+ return [ baseIteration ] ;
929+ }
930+ if (
931+ name === "testSuites:getTestIteration" &&
932+ typeof args === "object" &&
933+ args !== null &&
934+ ( args as { iterationId ?: string } ) . iterationId === baseIteration . _id
935+ ) {
936+ return baseIteration ;
937+ }
938+ return undefined ;
939+ } ) ;
940+ streamEvalTestCaseMock . mockImplementation (
941+ async ( ) => new Promise < void > ( ( ) => { } ) ,
942+ ) ;
943+
944+ renderWithProviders (
945+ < TestTemplateEditor
946+ suiteId = "suite-1"
947+ selectedTestCaseId = "case-1"
948+ connectedServerNames = { new Set ( [ "srv" ] ) }
949+ workspaceId = { null }
950+ availableModels = { [
951+ {
952+ provider : "openai" ,
953+ id : "gpt-4" ,
954+ model : "gpt-4" ,
955+ name : "GPT-4" ,
956+ label : "GPT-4" ,
957+ } as any ,
958+ ] }
959+ /> ,
960+ { hostStyle : "claude" } ,
961+ ) ;
962+
963+ await waitFor ( ( ) => {
964+ expect ( screen . getByRole ( "button" , { name : / r u n $ / i } ) ) . toBeInTheDocument ( ) ;
965+ } ) ;
966+
967+ await user . click ( screen . getByRole ( "button" , { name : / r u n $ / i } ) ) ;
968+
969+ await waitFor ( ( ) => {
970+ expect ( streamEvalTestCaseMock ) . toHaveBeenCalledTimes ( 1 ) ;
971+ } ) ;
972+
973+ // The pre-stream preview TraceViewer must be rendered in tools mode with
974+ // the expected tool call flattened from turn 2.
975+ await waitFor ( ( ) => {
976+ const props = getLatestTraceViewerProps ( ) ;
977+ expect ( props . forcedViewMode ) . toBe ( "tools" ) ;
978+ expect ( props . expectedToolCalls ) . toEqual ( [
979+ { toolName : "some_tool" , arguments : { } } ,
980+ ] ) ;
981+ } ) ;
982+ } ) ;
983+
814984 it ( "shows the host-style pill only while the chat tab is active" , async ( ) => {
815985 const user = userEvent . setup ( ) ;
816986 const caseWithExpectedToolCalls = {
@@ -882,6 +1052,10 @@ describe("TestTemplateEditor run view from route", () => {
8821052
8831053 const card = getCompareCard ( "GPT-4" ) ;
8841054
1055+ // Default tab is Results when the case has expected tools — host-style pill is Chat-only.
1056+ expect ( card . querySelector ( "[data-selected-host-style]" ) ) . toBeNull ( ) ;
1057+
1058+ await user . click ( within ( card ) . getByRole ( "button" , { name : / ^ C h a t $ / i } ) ) ;
8851059 expect ( card . querySelector ( '[data-selected-host-style="claude"]' ) ) . not . toBe (
8861060 null ,
8871061 ) ;
0 commit comments