evalops
diff --git a/‎README.md‎
Lines changed: 89 additions & 1 deletion b/‎README.md‎
Lines changed: 89 additions & 1 deletion
diff --git a/‎examples/conversational-analysis.ts‎
Lines changed: 197 additions & 0 deletions b/‎examples/conversational-analysis.ts‎
Lines changed: 197 additions & 0 deletions
diff --git a/‎package-lock.json‎
Lines changed: 21 additions & 0 deletions b/‎package-lock.json‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 2 additions & 0 deletions b/‎package.json‎
Lines changed: 2 additions & 0 deletions
@@ -17,6 +17,7 @@ The "escalation" model treats LLMs like heterogeneous microservices - route to t
 ## Features
 
 - **Gemini 2.5 Pro Preview**: Uses Google's latest Gemini 2.5 Pro Preview (05-06) model with 1M token context window
+- **Conversational Analysis**: NEW! AI-to-AI dialogues between Claude and Gemini for iterative problem-solving
 - **Execution Flow Tracing**: Understands data flow and state transformations, not just function calls
 - **Cross-System Impact Analysis**: Models how changes propagate across service boundaries
 - **Performance Modeling**: Identifies N+1 patterns, memory leaks, and algorithmic bottlenecks
@@ -99,7 +100,63 @@ Add to your Claude Desktop configuration (`~/Library/Application Support/Claude/
 
 **Note**: The tool parameters use snake_case naming convention and are validated using Zod schemas. The actual implementation provides more detailed type safety than shown in these simplified examples. Full TypeScript type definitions are available in `src/models/types.ts`.
 
-### escalate_analysis
+### Conversational Analysis Tools
+
+The server now includes AI-to-AI conversational tools that enable Claude and Gemini to engage in multi-turn dialogues for complex analysis:
+
+#### start_conversation
+Initiates a conversational analysis session between Claude and Gemini.
+
+```typescript
+{
+  claude_context: {
+    attempted_approaches: string[];      // What Claude tried
+    partial_findings: any[];            // What Claude found
+    stuck_description: string;          // Where Claude got stuck
+    code_scope: {
+      files: string[];                  // Files to analyze
+      entry_points?: CodeLocation[];    // Starting points
+      service_names?: string[];         // Services involved
+    }
+  };
+  analysis_type: 'execution_trace' | 'cross_system' | 'performance' | 'hypothesis_test';
+  initial_question?: string;            // Optional opening question
+}
+```
+
+#### continue_conversation
+Continues an active conversation with Claude's response or follow-up question.
+
+```typescript
+{
+  session_id: string;                   // Active session ID
+  message: string;                      // Claude's message to Gemini
+  include_code_snippets?: boolean;      // Enrich with code context
+}
+```
+
+#### finalize_conversation
+Completes the conversation and generates structured analysis results.
+
+```typescript
+{
+  session_id: string;                   // Active session ID
+  summary_format: 'detailed' | 'concise' | 'actionable';
+}
+```
+
+#### get_conversation_status
+Checks the status and progress of an ongoing conversation.
+
+```typescript
+{
+  session_id: string;                   // Session ID to check
+}
+```
+
+### Traditional Analysis Tools
+
+#### escalate_analysis
 Main tool for handing off complex analysis from Claude Code to Gemini.
 
 ```typescript
@@ -181,6 +238,37 @@ Test specific theories about code behavior.
 
 ## Example Use Cases
 
+### Conversational Analysis Example
+
+When Claude needs deep iterative analysis with Gemini:
+
+```javascript
+// 1. Start conversation
+const session = await start_conversation({
+  claude_context: {
+    attempted_approaches: ["Checked for N+1 queries", "Profiled database calls"],
+    partial_findings: [{ type: "performance", description: "Multiple DB queries in loop" }],
+    stuck_description: "Can't determine if queries are optimizable",
+    code_scope: { files: ["src/services/UserService.ts"] }
+  },
+  analysis_type: "performance",
+  initial_question: "Are these queries necessary or can they be batched?"
+});
+
+// 2. Continue with follow-ups
+const response = await continue_conversation({
+  session_id: session.sessionId,
+  message: "The queries fetch user preferences. Could we use a join instead?",
+  include_code_snippets: true
+});
+
+// 3. Finalize when ready
+const results = await finalize_conversation({
+  session_id: session.sessionId,
+  summary_format: "actionable"
+});
+```
+
 ### Case 1: Distributed Trace Analysis
 
 When a failure signature spans multiple services with GB of logs:
 
@@ -0,0 +1,197 @@
+/**
+ * Example of using the conversational MCP for AI-to-AI dialogue
+ * between Claude and Gemini for deep code analysis
+ */
+
+// Example 1: Performance Analysis Conversation
+async function performanceAnalysisExample() {
+  // Claude starts the conversation
+  const { sessionId, initialResponse, suggestedFollowUps } = await mcp.startConversation({
+    claude_context: {
+      attempted_approaches: [
+        "Searched for N+1 query patterns",
+        "Checked for obvious loops",
+        "Analyzed database calls"
+      ],
+      partial_findings: [
+        { type: "performance", description: "Found repeated DB calls in UserService" }
+      ],
+      stuck_description: "Can't determine if the performance issue is from algorithm complexity or I/O bottlenecks",
+      code_scope: {
+        files: ["src/services/UserService.ts", "src/repositories/UserRepository.ts"],
+        entry_points: ["getUserWithDetails"]
+      }
+    },
+    analysis_type: "performance",
+    initial_question: "I see repeated database calls but can't trace the full execution flow. Are these calls necessary or could they be optimized?"
+  });
+
+  console.log("Gemini's initial analysis:", initialResponse);
+  console.log("Suggested follow-ups:", suggestedFollowUps);
+
+  // Claude provides more context based on Gemini's questions
+  const response1 = await mcp.continueConversation({
+    session_id: sessionId,
+    message: "The getUserWithDetails function is called in a loop from the API handler. Each user triggers 3-4 additional queries for related data. The data volume is typically 100-1000 users per request.",
+    include_code_snippets: true
+  });
+
+  // Gemini asks for specific runtime characteristics
+  console.log("Gemini:", response1.response);
+  // Expected: "That's a classic N+1 problem. Are these related queries for user roles, permissions, or preferences? Also, is there any caching layer between the service and database?"
+
+  // Claude provides runtime details
+  const response2 = await mcp.continueConversation({
+    session_id: sessionId,
+    message: "The queries are for: user roles (1 query), permissions (1-2 queries), and preferences (1 query). No caching layer currently exists. The database is PostgreSQL with average query time of 10-15ms."
+  });
+
+  // Gemini provides deeper analysis
+  console.log("Gemini:", response2.response);
+  console.log("Progress:", response2.analysisProgress);
+  console.log("Can finalize:", response2.canFinalize);
+
+  // Finalize the conversation
+  const finalAnalysis = await mcp.finalizeConversation({
+    session_id: sessionId,
+    summary_format: "actionable"
+  });
+
+  console.log("Final recommendations:", finalAnalysis.recommendations);
+}
+
+// Example 2: Complex Execution Trace with Back-and-Forth
+async function executionTraceExample() {
+  // Claude initiates analysis of async execution flow
+  const { sessionId, initialResponse } = await mcp.startConversation({
+    claude_context: {
+      attempted_approaches: [
+        "Traced synchronous function calls",
+        "Identified async/await patterns",
+        "Looked for event emitters"
+      ],
+      partial_findings: [
+        { type: "architecture", description: "Complex async flow with multiple event handlers" }
+      ],
+      stuck_description: "Lost track of execution when events are emitted - can't determine order of operations",
+      code_scope: {
+        files: ["src/workers/DataProcessor.ts", "src/events/EventBus.ts"],
+        entry_points: ["processDataBatch"]
+      }
+    },
+    analysis_type: "execution_trace"
+  });
+
+  // Conversational flow
+  const conversation = [
+    {
+      claude: "I found event emitters for 'data.processed' and 'batch.complete' but can't trace their handlers",
+      gemini: "I see the event handlers are registered dynamically. Are there any race conditions between these handlers?"
+    },
+    {
+      claude: "Yes! Sometimes 'batch.complete' fires before all 'data.processed' events are handled. Here's the code where handlers are registered...",
+      gemini: "This is a race condition. The batch completion check doesn't wait for pending promises. Let me trace the actual execution order..."
+    }
+  ];
+
+  for (const turn of conversation) {
+    const response = await mcp.continueConversation({
+      session_id: sessionId,
+      message: turn.claude,
+      include_code_snippets: true
+    });
+    console.log("Gemini's response:", response.response);
+  }
+
+  // Get final execution trace
+  const finalAnalysis = await mcp.finalizeConversation({
+    session_id: sessionId,
+    summary_format: "detailed"
+  });
+
+  console.log("Root causes found:", finalAnalysis.findings.rootCauses);
+}
+
+// Example 3: Hypothesis Testing Through Dialogue
+async function hypothesisTestingExample() {
+  const { sessionId } = await mcp.startConversation({
+    claude_context: {
+      attempted_approaches: ["Static analysis", "Pattern matching"],
+      partial_findings: [
+        { type: "bug", description: "Intermittent null pointer exceptions in production" }
+      ],
+      stuck_description: "Can't reproduce the issue locally - suspect it's related to concurrent access",
+      code_scope: {
+        files: ["src/cache/CacheManager.ts", "src/services/SessionService.ts"]
+      }
+    },
+    analysis_type: "hypothesis_test",
+    initial_question: "My hypothesis: the cache invalidation happens during read operations causing null returns. Can you help validate this?"
+  });
+
+  // Multi-turn hypothesis refinement
+  await mcp.continueConversation({
+    session_id: sessionId,
+    message: "The cache uses a simple Map without synchronization. Multiple services access it concurrently."
+  });
+
+  const status = await mcp.getConversationStatus({ session_id: sessionId });
+  console.log("Conversation status:", status);
+
+  // Continue until ready to finalize
+  while (!status.canFinalize) {
+    // Continue conversation based on Gemini's questions
+  }
+
+  const result = await mcp.finalizeConversation({ session_id: sessionId });
+  console.log("Validated hypotheses:", result.enrichedContext.validatedHypotheses);
+}
+
+// Example 4: Cross-System Impact Analysis with Progressive Discovery
+async function crossSystemExample() {
+  const { sessionId, initialResponse } = await mcp.startConversation({
+    claude_context: {
+      attempted_approaches: ["Checked API contracts", "Reviewed service dependencies"],
+      partial_findings: [
+        { type: "architecture", description: "API change in UserService affects multiple consumers" }
+      ],
+      stuck_description: "Can't trace all downstream impacts - some services use dynamic field access",
+      code_scope: {
+        files: ["src/api/UserAPI.ts"],
+        service_names: ["UserService", "AuthService", "NotificationService"]
+      }
+    },
+    analysis_type: "cross_system",
+    initial_question: "Planning to change the user object structure. Which services will break?"
+  });
+
+  // Progressive discovery through conversation
+  console.log("Initial impact assessment:", initialResponse);
+
+  // Claude discovers new service dependencies during conversation
+  await mcp.continueConversation({
+    session_id: sessionId,
+    message: "Just found that ReportingService also consumes user data through event streams. It expects the old field names."
+  });
+
+  await mcp.continueConversation({
+    session_id: sessionId,
+    message: "The AnalyticsService has a batch job that processes user updates. It uses reflection to access fields dynamically."
+  });
+
+  // Get comprehensive impact analysis
+  const finalResult = await mcp.finalizeConversation({
+    session_id: sessionId,
+    summary_format: "detailed"
+  });
+
+  console.log("All affected services:", finalResult.findings.crossSystemImpacts);
+  console.log("Breaking changes:", finalResult.recommendations.immediateActions);
+}
+
+export {
+  performanceAnalysisExample,
+  executionTraceExample,
+  hypothesisTestingExample,
+  crossSystemExample
+};
@@ -35,7 +35,9 @@
     "@google/generative-ai": "^0.24.1",
     "@modelcontextprotocol/sdk": "^0.5.0",
     "@types/node": "^20.0.0",
+    "@types/uuid": "^10.0.0",
     "dotenv": "^16.3.1",
+    "uuid": "^11.1.0",
     "zod": "^3.22.0"
   },
   "devDependencies": {