@@ -422,6 +422,172 @@ func TestRealCodex_DynamicFilterToggle(t *testing.T) {
422422 }
423423}
424424
425+ // ---------------------------------------------------------------------------
426+ // Full end-to-end: real agent starts, processes messages, creates sessions.
427+ // Requires API keys — these tests take 30-60s each.
428+ // ---------------------------------------------------------------------------
429+
430+ // TestE2E_Codex_FullSessionLifecycle exercises the complete workflow with a
431+ // real Codex agent:
432+ // 1. Send message → wait for agent reply → /list shows 1 session
433+ // 2. /new "my-test-session" → new session created
434+ // 3. Send message in new session → wait for agent reply
435+ // 4. /list → both sessions visible, session name "my-test-session" appears
436+ //
437+ // This proves the full pipeline: real CLI process → event parsing → session
438+ // tracking → filter logic → /list output.
439+ func TestE2E_Codex_FullSessionLifecycle (t * testing.T ) {
440+ e , mp , _ , cleanup := setupIntegrationEngine (t , "codex" )
441+ defer cleanup ()
442+
443+ uk := sessionKey ("e2e-codex-user" )
444+ send := func (content string ) {
445+ e .ReceiveMessage (mp , & core.Message {
446+ SessionKey : uk , Platform : "mock" , UserID : "e2e-codex-user" ,
447+ UserName : "tester" , Content : content , ReplyCtx : "ctx" ,
448+ })
449+ }
450+
451+ // ── Step 1: first message → agent replies ──
452+ t .Log ("step 1: sending first message to codex" )
453+ send ("respond with exactly: STEP1_OK" )
454+ _ , ok := waitForMessageContaining (mp , "STEP1_OK" , 60 * time .Second )
455+ if ! ok {
456+ t .Fatalf ("step 1: agent did not reply; got: %v" , mp .getSent ())
457+ }
458+ t .Log ("step 1: agent replied" )
459+
460+ // ── Step 2: /list → should show at least 1 session ──
461+ mp .clear ()
462+ send ("/list" )
463+ msgs1 , ok := waitForMessages (mp , 1 , 10 * time .Second )
464+ if ! ok {
465+ t .Fatalf ("step 2: no /list reply" )
466+ }
467+ list1 := joinMsgContent (msgs1 )
468+ count1 := strings .Count (list1 , "msgs" )
469+ if count1 < 1 {
470+ t .Fatalf ("step 2: /list should show >= 1 session, got %d\n %s" , count1 , list1 )
471+ }
472+ t .Logf ("step 2: /list shows %d session(s)" , count1 )
473+
474+ // ── Step 3: /new with custom name ──
475+ mp .clear ()
476+ send ("/new my-test-session" )
477+ _ , ok = waitForMessageContaining (mp , "new" , 10 * time .Second )
478+ if ! ok {
479+ t .Logf ("step 3: /new response: %v" , mp .getSent ())
480+ }
481+ t .Log ("step 3: /new executed" )
482+
483+ // ── Step 4: send message in new session → agent replies ──
484+ mp .clear ()
485+ send ("respond with exactly: STEP4_OK" )
486+ _ , ok = waitForMessageContaining (mp , "STEP4_OK" , 60 * time .Second )
487+ if ! ok {
488+ t .Fatalf ("step 4: agent did not reply in new session; got: %v" , mp .getSent ())
489+ }
490+ t .Log ("step 4: agent replied in new session" )
491+
492+ // ── Step 5: /list → both sessions visible ──
493+ mp .clear ()
494+ send ("/list" )
495+ msgs2 , ok := waitForMessages (mp , 1 , 10 * time .Second )
496+ if ! ok {
497+ t .Fatalf ("step 5: no /list reply" )
498+ }
499+ list2 := joinMsgContent (msgs2 )
500+ count2 := strings .Count (list2 , "msgs" )
501+ if count2 < 2 {
502+ t .Fatalf ("step 5: /list should show >= 2 sessions after /new + message, got %d\n %s" , count2 , list2 )
503+ }
504+ t .Logf ("step 5: /list shows %d sessions" , count2 )
505+
506+ // ── Step 6: verify session name ──
507+ if ! strings .Contains (list2 , "my-test-session" ) {
508+ t .Errorf ("step 6: /list should show session name 'my-test-session'\n %s" , list2 )
509+ } else {
510+ t .Log ("step 6: session name 'my-test-session' confirmed in /list" )
511+ }
512+ }
513+
514+ // TestE2E_ClaudeCode_FullSessionLifecycle is the same as the Codex variant
515+ // but exercises Claude Code's session handling (synchronous session ID).
516+ func TestE2E_ClaudeCode_FullSessionLifecycle (t * testing.T ) {
517+ e , mp , _ , cleanup := setupIntegrationEngine (t , "claudecode" )
518+ defer cleanup ()
519+
520+ uk := sessionKey ("e2e-cc-user" )
521+ send := func (content string ) {
522+ e .ReceiveMessage (mp , & core.Message {
523+ SessionKey : uk , Platform : "mock" , UserID : "e2e-cc-user" ,
524+ UserName : "tester" , Content : content , ReplyCtx : "ctx" ,
525+ })
526+ }
527+
528+ // ── Step 1: first message → agent replies ──
529+ t .Log ("step 1: sending first message to claude code" )
530+ send ("respond with exactly: STEP1_OK" )
531+ _ , ok := waitForMessageContaining (mp , "STEP1_OK" , 60 * time .Second )
532+ if ! ok {
533+ t .Fatalf ("step 1: agent did not reply; got: %v" , mp .getSent ())
534+ }
535+ t .Log ("step 1: agent replied" )
536+
537+ // ── Step 2: /list ──
538+ mp .clear ()
539+ send ("/list" )
540+ msgs1 , ok := waitForMessages (mp , 1 , 10 * time .Second )
541+ if ! ok {
542+ t .Fatalf ("step 2: no /list reply" )
543+ }
544+ list1 := joinMsgContent (msgs1 )
545+ count1 := strings .Count (list1 , "msgs" )
546+ if count1 < 1 {
547+ t .Fatalf ("step 2: /list should show >= 1 session, got %d\n %s" , count1 , list1 )
548+ }
549+ t .Logf ("step 2: /list shows %d session(s)" , count1 )
550+
551+ // ── Step 3: /new ──
552+ mp .clear ()
553+ send ("/new cc-session-name" )
554+ _ , ok = waitForMessageContaining (mp , "new" , 10 * time .Second )
555+ if ! ok {
556+ t .Logf ("step 3: /new response: %v" , mp .getSent ())
557+ }
558+ t .Log ("step 3: /new executed" )
559+
560+ // ── Step 4: message in new session ──
561+ mp .clear ()
562+ send ("respond with exactly: STEP4_OK" )
563+ _ , ok = waitForMessageContaining (mp , "STEP4_OK" , 60 * time .Second )
564+ if ! ok {
565+ t .Fatalf ("step 4: agent did not reply in new session; got: %v" , mp .getSent ())
566+ }
567+ t .Log ("step 4: agent replied in new session" )
568+
569+ // ── Step 5: /list → both sessions ──
570+ mp .clear ()
571+ send ("/list" )
572+ msgs2 , ok := waitForMessages (mp , 1 , 10 * time .Second )
573+ if ! ok {
574+ t .Fatalf ("step 5: no /list reply" )
575+ }
576+ list2 := joinMsgContent (msgs2 )
577+ count2 := strings .Count (list2 , "msgs" )
578+ if count2 < 2 {
579+ t .Fatalf ("step 5: /list should show >= 2 sessions, got %d\n %s" , count2 , list2 )
580+ }
581+ t .Logf ("step 5: /list shows %d sessions" , count2 )
582+
583+ // ── Step 6: verify session name ──
584+ if ! strings .Contains (list2 , "cc-session-name" ) {
585+ t .Errorf ("step 6: /list should show session name 'cc-session-name'\n %s" , list2 )
586+ } else {
587+ t .Log ("step 6: session name 'cc-session-name' confirmed in /list" )
588+ }
589+ }
590+
425591// ---------------------------------------------------------------------------
426592// helpers
427593// ---------------------------------------------------------------------------
0 commit comments