Skip to content

Commit e9c0d7f

Browse files
committed
test(integration): add E2E tests with real agent conversation
Add TestE2E_Codex_FullSessionLifecycle and TestE2E_ClaudeCode_FullSessionLifecycle that exercise the complete workflow: send message → agent replies → /list → /new with name → send message → agent replies → /list verifies both sessions visible with correct session name. Requires API keys (OPENAI_API_KEY / ANTHROPIC_API_KEY); skips gracefully when unavailable. Made-with: Cursor
1 parent f3a729d commit e9c0d7f

1 file changed

Lines changed: 166 additions & 0 deletions

File tree

tests/integration/filter_sessions_test.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,172 @@ func TestRealCodex_DynamicFilterToggle(t *testing.T) {
422422
}
423423
}
424424

425+
// ---------------------------------------------------------------------------
426+
// Full end-to-end: real agent starts, processes messages, creates sessions.
427+
// Requires API keys — these tests take 30-60s each.
428+
// ---------------------------------------------------------------------------
429+
430+
// TestE2E_Codex_FullSessionLifecycle exercises the complete workflow with a
431+
// real Codex agent:
432+
// 1. Send message → wait for agent reply → /list shows 1 session
433+
// 2. /new "my-test-session" → new session created
434+
// 3. Send message in new session → wait for agent reply
435+
// 4. /list → both sessions visible, session name "my-test-session" appears
436+
//
437+
// This proves the full pipeline: real CLI process → event parsing → session
438+
// tracking → filter logic → /list output.
439+
func TestE2E_Codex_FullSessionLifecycle(t *testing.T) {
440+
e, mp, _, cleanup := setupIntegrationEngine(t, "codex")
441+
defer cleanup()
442+
443+
uk := sessionKey("e2e-codex-user")
444+
send := func(content string) {
445+
e.ReceiveMessage(mp, &core.Message{
446+
SessionKey: uk, Platform: "mock", UserID: "e2e-codex-user",
447+
UserName: "tester", Content: content, ReplyCtx: "ctx",
448+
})
449+
}
450+
451+
// ── Step 1: first message → agent replies ──
452+
t.Log("step 1: sending first message to codex")
453+
send("respond with exactly: STEP1_OK")
454+
_, ok := waitForMessageContaining(mp, "STEP1_OK", 60*time.Second)
455+
if !ok {
456+
t.Fatalf("step 1: agent did not reply; got: %v", mp.getSent())
457+
}
458+
t.Log("step 1: agent replied")
459+
460+
// ── Step 2: /list → should show at least 1 session ──
461+
mp.clear()
462+
send("/list")
463+
msgs1, ok := waitForMessages(mp, 1, 10*time.Second)
464+
if !ok {
465+
t.Fatalf("step 2: no /list reply")
466+
}
467+
list1 := joinMsgContent(msgs1)
468+
count1 := strings.Count(list1, "msgs")
469+
if count1 < 1 {
470+
t.Fatalf("step 2: /list should show >= 1 session, got %d\n%s", count1, list1)
471+
}
472+
t.Logf("step 2: /list shows %d session(s)", count1)
473+
474+
// ── Step 3: /new with custom name ──
475+
mp.clear()
476+
send("/new my-test-session")
477+
_, ok = waitForMessageContaining(mp, "new", 10*time.Second)
478+
if !ok {
479+
t.Logf("step 3: /new response: %v", mp.getSent())
480+
}
481+
t.Log("step 3: /new executed")
482+
483+
// ── Step 4: send message in new session → agent replies ──
484+
mp.clear()
485+
send("respond with exactly: STEP4_OK")
486+
_, ok = waitForMessageContaining(mp, "STEP4_OK", 60*time.Second)
487+
if !ok {
488+
t.Fatalf("step 4: agent did not reply in new session; got: %v", mp.getSent())
489+
}
490+
t.Log("step 4: agent replied in new session")
491+
492+
// ── Step 5: /list → both sessions visible ──
493+
mp.clear()
494+
send("/list")
495+
msgs2, ok := waitForMessages(mp, 1, 10*time.Second)
496+
if !ok {
497+
t.Fatalf("step 5: no /list reply")
498+
}
499+
list2 := joinMsgContent(msgs2)
500+
count2 := strings.Count(list2, "msgs")
501+
if count2 < 2 {
502+
t.Fatalf("step 5: /list should show >= 2 sessions after /new + message, got %d\n%s", count2, list2)
503+
}
504+
t.Logf("step 5: /list shows %d sessions", count2)
505+
506+
// ── Step 6: verify session name ──
507+
if !strings.Contains(list2, "my-test-session") {
508+
t.Errorf("step 6: /list should show session name 'my-test-session'\n%s", list2)
509+
} else {
510+
t.Log("step 6: session name 'my-test-session' confirmed in /list")
511+
}
512+
}
513+
514+
// TestE2E_ClaudeCode_FullSessionLifecycle is the same as the Codex variant
515+
// but exercises Claude Code's session handling (synchronous session ID).
516+
func TestE2E_ClaudeCode_FullSessionLifecycle(t *testing.T) {
517+
e, mp, _, cleanup := setupIntegrationEngine(t, "claudecode")
518+
defer cleanup()
519+
520+
uk := sessionKey("e2e-cc-user")
521+
send := func(content string) {
522+
e.ReceiveMessage(mp, &core.Message{
523+
SessionKey: uk, Platform: "mock", UserID: "e2e-cc-user",
524+
UserName: "tester", Content: content, ReplyCtx: "ctx",
525+
})
526+
}
527+
528+
// ── Step 1: first message → agent replies ──
529+
t.Log("step 1: sending first message to claude code")
530+
send("respond with exactly: STEP1_OK")
531+
_, ok := waitForMessageContaining(mp, "STEP1_OK", 60*time.Second)
532+
if !ok {
533+
t.Fatalf("step 1: agent did not reply; got: %v", mp.getSent())
534+
}
535+
t.Log("step 1: agent replied")
536+
537+
// ── Step 2: /list ──
538+
mp.clear()
539+
send("/list")
540+
msgs1, ok := waitForMessages(mp, 1, 10*time.Second)
541+
if !ok {
542+
t.Fatalf("step 2: no /list reply")
543+
}
544+
list1 := joinMsgContent(msgs1)
545+
count1 := strings.Count(list1, "msgs")
546+
if count1 < 1 {
547+
t.Fatalf("step 2: /list should show >= 1 session, got %d\n%s", count1, list1)
548+
}
549+
t.Logf("step 2: /list shows %d session(s)", count1)
550+
551+
// ── Step 3: /new ──
552+
mp.clear()
553+
send("/new cc-session-name")
554+
_, ok = waitForMessageContaining(mp, "new", 10*time.Second)
555+
if !ok {
556+
t.Logf("step 3: /new response: %v", mp.getSent())
557+
}
558+
t.Log("step 3: /new executed")
559+
560+
// ── Step 4: message in new session ──
561+
mp.clear()
562+
send("respond with exactly: STEP4_OK")
563+
_, ok = waitForMessageContaining(mp, "STEP4_OK", 60*time.Second)
564+
if !ok {
565+
t.Fatalf("step 4: agent did not reply in new session; got: %v", mp.getSent())
566+
}
567+
t.Log("step 4: agent replied in new session")
568+
569+
// ── Step 5: /list → both sessions ──
570+
mp.clear()
571+
send("/list")
572+
msgs2, ok := waitForMessages(mp, 1, 10*time.Second)
573+
if !ok {
574+
t.Fatalf("step 5: no /list reply")
575+
}
576+
list2 := joinMsgContent(msgs2)
577+
count2 := strings.Count(list2, "msgs")
578+
if count2 < 2 {
579+
t.Fatalf("step 5: /list should show >= 2 sessions, got %d\n%s", count2, list2)
580+
}
581+
t.Logf("step 5: /list shows %d sessions", count2)
582+
583+
// ── Step 6: verify session name ──
584+
if !strings.Contains(list2, "cc-session-name") {
585+
t.Errorf("step 6: /list should show session name 'cc-session-name'\n%s", list2)
586+
} else {
587+
t.Log("step 6: session name 'cc-session-name' confirmed in /list")
588+
}
589+
}
590+
425591
// ---------------------------------------------------------------------------
426592
// helpers
427593
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)