MetaMask
diff --git a/‎.github/workflows/generate-rc-test-plan.yml‎
Lines changed: 367 additions & 0 deletions b/‎.github/workflows/generate-rc-test-plan.yml‎
Lines changed: 367 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/.eslintrc.js‎
Lines changed: 6 additions & 0 deletions b/‎tests/tools/e2e-ai-analyzer/.eslintrc.js‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/ai-tools/handlers/finalize-test-plan.ts‎
Lines changed: 11 additions & 0 deletions b/‎tests/tools/e2e-ai-analyzer/ai-tools/handlers/finalize-test-plan.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/ai-tools/tool-executor.ts‎
Lines changed: 4 additions & 0 deletions b/‎tests/tools/e2e-ai-analyzer/ai-tools/tool-executor.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/ai-tools/tool-registry.ts‎
Lines changed: 205 additions & 2 deletions b/‎tests/tools/e2e-ai-analyzer/ai-tools/tool-registry.ts‎
Lines changed: 205 additions & 2 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/analysis/analyzer.ts‎
Lines changed: 23 additions & 3 deletions b/‎tests/tools/e2e-ai-analyzer/analysis/analyzer.ts‎
Lines changed: 23 additions & 3 deletions
diff --git a/‎tests/tools/e2e-ai-analyzer/config.ts‎
Lines changed: 2 additions & 1 deletion b/‎tests/tools/e2e-ai-analyzer/config.ts‎
Lines changed: 2 additions & 1 deletion
@@ -185,3 +185,8 @@ temp/
 tests/coverage-systems/
 
 runway-artifacts/
+
+# E2E AI Analyzer output files
+release-test-plan.json
+release-delta.json
+release-signoffs.json
@@ -0,0 +1,6 @@
+module.exports = {
+  rules: {
+    // Disable deprecated rule that doesn't exist in current ESLint version
+    '@typescript-eslint/no-parameter-properties': 'off',
+  },
+};
@@ -0,0 +1,11 @@
+/**
+ * Finalize Test Plan Tool Handler
+ *
+ * Handles the finalization of the AI's test plan generation
+ */
+
+import { ToolInput } from '../../types';
+
+export function handleFinalizeTestPlan(input: ToolInput): string {
+  return JSON.stringify(input);
+}
@@ -12,6 +12,7 @@ import { handleListDirectory } from './handlers/list-directory';
 import { handleGrepCodebase } from './handlers/grep-codebase';
 import { handleLoadSkill } from './handlers/load-skill';
 import { handleFinalizeTagSelection } from './handlers/finalize-tag-selection';
+import { handleFinalizeTestPlan } from './handlers/finalize-test-plan';
 
 /**
  * Tool execution context
@@ -57,6 +58,9 @@ export async function executeTool(
       case 'finalize_tag_selection':
         return handleFinalizeTagSelection(input);
 
+      case 'finalize_test_plan_generation':
+        return handleFinalizeTestPlan(input);
+
       default:
         return `Unknown tool: ${toolName}`;
     }
 
@@ -9,10 +9,10 @@ import { LLMTool } from '../providers';
 import { TOOL_LIMITS } from '../config';
 
 /**
- * Gets all tool definitions for the AI agent
+ * Gets tool definitions for the AI agent
  */
 export function getToolDefinitions(): LLMTool[] {
-  return [
+  const allTools: LLMTool[] = [
     {
       name: 'read_file',
       description:
@@ -194,5 +194,208 @@ export function getToolDefinitions(): LLMTool[] {
         ],
       },
     },
+    {
+      name: 'finalize_test_plan_generation',
+      description: 'Submit the final exploratory test plan for the release',
+      input_schema: {
+        type: 'object',
+        properties: {
+          summary: {
+            type: 'object',
+            description: 'High-level metrics for the test plan',
+            properties: {
+              total_changed_files: { type: 'number' },
+              total_commits: { type: 'number' },
+              critical_areas: { type: 'number' },
+              high_risk_areas: { type: 'number' },
+              medium_risk_areas: { type: 'number' },
+              low_risk_areas: { type: 'number' },
+              estimated_testing_hours: { type: 'string' },
+              release_version: { type: 'string' },
+            },
+            required: [
+              'total_changed_files',
+              'critical_areas',
+              'high_risk_areas',
+              'estimated_testing_hours',
+            ],
+          },
+          feature_areas: {
+            type: 'array',
+            description:
+              'Prioritized list of feature areas with test scenarios',
+            items: {
+              type: 'object',
+              properties: {
+                feature_area: { type: 'string' },
+                risk_level: {
+                  type: 'string',
+                  enum: ['critical', 'high', 'medium', 'low'],
+                },
+                risk_justification: { type: 'string' },
+                impacted_components: {
+                  type: 'array',
+                  items: { type: 'string' },
+                },
+                exploratory_scenarios: {
+                  type: 'array',
+                  items: {
+                    type: 'object',
+                    properties: {
+                      id: { type: 'string' },
+                      title: { type: 'string' },
+                      description: { type: 'string' },
+                      preconditions: {
+                        type: 'array',
+                        items: { type: 'string' },
+                      },
+                      exploration_guidance: {
+                        type: 'array',
+                        items: { type: 'string' },
+                      },
+                      risk_indicators: {
+                        type: 'array',
+                        items: { type: 'string' },
+                      },
+                      related_changes: {
+                        type: 'array',
+                        items: { type: 'string' },
+                      },
+                    },
+                    required: ['id', 'title', 'description'],
+                  },
+                },
+                platform_notes: {
+                  type: 'object',
+                  properties: {
+                    ios: { type: 'array', items: { type: 'string' } },
+                    android: { type: 'array', items: { type: 'string' } },
+                    shared: { type: 'array', items: { type: 'string' } },
+                  },
+                },
+                priority: { type: 'number' },
+                exploratory_priority: {
+                  type: 'number',
+                  description:
+                    'Score 1-10 indicating how much this area needs exploratory testing',
+                },
+                exploration_charters: {
+                  type: 'array',
+                  description: 'Specific exploration missions for this area',
+                  items: {
+                    type: 'object',
+                    properties: {
+                      id: { type: 'string' },
+                      mission: {
+                        type: 'string',
+                        description: 'The exploration goal',
+                      },
+                      context: {
+                        type: 'string',
+                        description: 'Why this matters for this release',
+                      },
+                      what_ifs: {
+                        type: 'array',
+                        items: { type: 'string' },
+                        description: 'Specific questions to investigate',
+                      },
+                      time_box: {
+                        type: 'string',
+                        description: 'Suggested exploration time',
+                      },
+                    },
+                    required: ['id', 'mission', 'what_ifs'],
+                  },
+                },
+              },
+              required: ['feature_area', 'risk_level', 'priority'],
+            },
+          },
+          cross_cutting_concerns: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'Issues that span multiple feature areas',
+          },
+          regression_focus_areas: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'Areas requiring extra regression attention',
+          },
+          platform_specific_guidance: {
+            type: 'object',
+            properties: {
+              ios: { type: 'array', items: { type: 'string' } },
+              android: { type: 'array', items: { type: 'string' } },
+              shared: { type: 'array', items: { type: 'string' } },
+            },
+          },
+          exploration_themes: {
+            type: 'array',
+            description:
+              'Cross-cutting exploration approaches that apply across features',
+            items: {
+              type: 'object',
+              properties: {
+                name: {
+                  type: 'string',
+                  description: 'Theme name (e.g., "Interruption Testing")',
+                },
+                description: {
+                  type: 'string',
+                  description: 'What this theme covers',
+                },
+                techniques: {
+                  type: 'array',
+                  items: { type: 'string' },
+                  description: 'Specific testing techniques for this theme',
+                },
+                applicable_areas: {
+                  type: 'array',
+                  items: { type: 'string' },
+                  description:
+                    'Feature areas where this theme is especially relevant',
+                },
+              },
+              required: ['name', 'description', 'techniques'],
+            },
+          },
+          exploratory_focus_areas: {
+            type: 'array',
+            description:
+              'Top 3-5 areas most deserving of creative exploratory testing',
+            items: {
+              type: 'object',
+              properties: {
+                feature_area: { type: 'string' },
+                exploratory_priority: {
+                  type: 'number',
+                  description: 'Score 1-10',
+                },
+                reason: {
+                  type: 'string',
+                  description: 'Why this area needs exploration',
+                },
+                suggested_time_box: {
+                  type: 'string',
+                  description: 'Recommended exploration time',
+                },
+              },
+              required: ['feature_area', 'exploratory_priority', 'reason'],
+            },
+          },
+          reasoning: {
+            type: 'string',
+            description: 'Explanation of analysis approach and key findings',
+          },
+          confidence: {
+            type: 'number',
+            description: 'Confidence score 0-100',
+          },
+        },
+        required: ['summary', 'feature_areas', 'reasoning', 'confidence'],
+      },
+    },
   ];
+
+  return allTools;
 }
@@ -33,6 +33,16 @@ import {
   outputAnalysis as outputSelectTagsAnalysis,
   checkHardRules as checkSelectTagsHardRules,
 } from '../modes/select-tags/handlers';
+import {
+  buildSystemPrompt as buildTestPlanSystemPrompt,
+  buildTaskPrompt as buildTestPlanTaskPrompt,
+} from '../modes/generate-test-plan/prompt';
+import {
+  processAnalysis as processTestPlanAnalysis,
+  createConservativeResult as createTestPlanConservativeResult,
+  createEmptyResult as createTestPlanEmptyResult,
+  outputAnalysis as outputTestPlanAnalysis,
+} from '../modes/generate-test-plan/handlers';
 
 /**
  * Mode Registry — see ModeConfig in types/index.ts for the full interface.
@@ -56,6 +66,16 @@ export const MODES: {
     outputAnalysis: outputSelectTagsAnalysis,
     checkHardRules: checkSelectTagsHardRules,
   },
+  'generate-test-plan': {
+    description: 'Generate exploratory test plan for release testing',
+    finalizeToolName: 'finalize_test_plan_generation',
+    systemPromptBuilder: buildTestPlanSystemPrompt,
+    taskPromptBuilder: buildTestPlanTaskPrompt,
+    processAnalysis: processTestPlanAnalysis,
+    createConservativeResult: createTestPlanConservativeResult,
+    createEmptyResult: createTestPlanEmptyResult,
+    outputAnalysis: outputTestPlanAnalysis,
+  },
 };
 
 // Type aliases for mode keys and analysis results
@@ -112,6 +132,7 @@ export async function analyzeWithAgent<M extends ModeKey>(
   const taskPrompt = modeConfig.taskPromptBuilder(
     allChangedFiles,
     criticalFiles,
+    context,
   );
 
   const tools = getToolDefinitions();
@@ -229,7 +250,7 @@ export async function analyzeWithAgent<M extends ModeKey>(
               return analysis as ModeAnalysisResult<M>;
             }
 
-            console.log('⚠️ Failed to parse finalize_tag_selection');
+            console.log(`⚠️ Failed to parse ${modeConfig.finalizeToolName}`);
             printTokenReport();
             return modeConfig.createConservativeResult() as ModeAnalysisResult<M>;
           }
@@ -245,8 +266,7 @@ export async function analyzeWithAgent<M extends ModeKey>(
       // Update conversation history
       conversationHistory.push({
         role: 'user',
-        content:
-          typeof currentMessage === 'string' ? currentMessage : currentMessage,
+        content: currentMessage,
       });
       conversationHistory.push({
         role: 'assistant',
 
@@ -28,8 +28,9 @@ export const LLM_CONFIG = {
   /**
    * Provider priority order for automatic fallback
    * The first available provider in this list will be used
+   * Order: Claude → OpenAI → Gemini (matching Extension team)
    */
-  providerPriority: ['openai', 'anthropic', 'google'] as ProviderType[],
+  providerPriority: ['anthropic', 'openai', 'google'] as ProviderType[],
 
   /**
    * Per-provider configuration