diff --git a/.claude/skills/e2e/implement.md b/.claude/skills/e2e/implement.md index 342c87213..db9ba8e93 100644 --- a/.claude/skills/e2e/implement.md +++ b/.claude/skills/e2e/implement.md @@ -2,6 +2,10 @@ Apply fixes for E2E test failures, verify with scoped E2E tests. +> **Before implementing any fixes, enter plan mode by invoking /plan.** +> Analyze the findings (Steps 1-2 below), produce a complete fix plan with +> specific file paths and code changes, and get user approval before executing. + > **IMPORTANT: Running real E2E tests is a HARD REQUIREMENT of this procedure.** > Every fix MUST be verified with real E2E tests before the summary step. > Canary tests use the Vogon fake agent and cannot catch agent-specific issues. diff --git a/.github/workflows/e2e-fix.yml b/.github/workflows/e2e-fix.yml new file mode 100644 index 000000000..c110c948b --- /dev/null +++ b/.github/workflows/e2e-fix.yml @@ -0,0 +1,140 @@ +name: E2E Fix + +on: + workflow_dispatch: + inputs: + triage_run_id: + description: Run ID of the triage workflow (for downloading plan artifacts) + required: true + type: string + run_url: + description: Original failed E2E run URL + required: true + type: string + failed_agents: + description: Comma-separated list of agents to fix + required: true + type: string + slack_channel: + description: Slack channel ID for thread replies + required: false + type: string + slack_thread_ts: + description: Slack thread timestamp for replies + required: false + type: string + +permissions: + actions: read + contents: write + pull-requests: write + id-token: write + +concurrency: + group: e2e-fix-${{ inputs.run_url || github.run_id }} + cancel-in-progress: true + +jobs: + fix: + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_CHANNEL: ${{ inputs.slack_channel }} + SLACK_THREAD_TS: ${{ inputs.slack_thread_ts }} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Post fix started + if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} + shell: bash + env: + FAILED_AGENTS: ${{ inputs.failed_agents }} + run: | + set -euo pipefail + + scripts/post-slack-message.sh "Starting E2E fix for \`${FAILED_AGENTS}\`." + + - name: Setup mise + uses: jdx/mise-action@v4 + + - name: Download plan artifacts + env: + GH_TOKEN: ${{ github.token }} + TRIAGE_RUN_ID: ${{ inputs.triage_run_id }} + FAILED_AGENTS: ${{ inputs.failed_agents }} + shell: bash + run: | + set -euo pipefail + + mkdir -p triage-plans + + IFS=',' read -ra agents <<< "$FAILED_AGENTS" + for agent in "${agents[@]}"; do + agent="$(echo "$agent" | xargs)" # trim whitespace + echo "Downloading plan for $agent..." + gh run download "$TRIAGE_RUN_ID" \ + --name "e2e-plan-${agent}" \ + --dir "triage-plans/${agent}" || { + echo "warning: no plan artifact found for $agent" >&2 + continue + } + done + + echo "Downloaded plans:" + find triage-plans -name '*.md' -type f + + - name: Apply fixes + id: fix + uses: anthropics/claude-code-action@v1 + with: + prompt: | + Read the fix plans in the triage-plans/ directory. Each subdirectory contains a plan.md for one agent. + + Execute all fixes exactly as specified in the plans. After applying fixes, run: + 1. mise run fmt + 2. mise run lint + 3. mise run test:e2e:canary + + If verification passes, create a git branch fix/e2e-${{ github.run_id }}, commit all changes, + push, and create a draft PR with a summary of what was fixed. + + If verification fails, fix the issues and retry. Do not give up without attempting to fix lint/format errors. + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--allowedTools 'Edit,Write,Read,Glob,Grep,Bash(git:*),Bash(mise:*),Bash(gh:*)'" + + - name: Post success to Slack + if: success() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + shell: bash + env: + GH_TOKEN: ${{ github.token }} + FIX_BRANCH: fix/e2e-${{ github.run_id }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + + # Find the draft PR URL from the fix step output + pr_url="$(gh pr list --head "$FIX_BRANCH" --json url -q '.[0].url' 2>/dev/null || true)" + + if [ -n "$pr_url" ]; then + message="E2E fix complete — draft PR ready: <${pr_url}|Review PR>" + else + message="E2E fix complete — changes applied but no PR was created. Check the <${RUN_URL}|workflow run> for details." + fi + + scripts/post-slack-message.sh "$message" + + - name: Post failure to Slack + if: failure() && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + shell: bash + env: + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + + message="E2E fix failed. Check the <${RUN_URL}|workflow run> for details." + + scripts/post-slack-message.sh "$message" diff --git a/.github/workflows/e2e-triage.yml b/.github/workflows/e2e-triage.yml new file mode 100644 index 000000000..e26a2bb23 --- /dev/null +++ b/.github/workflows/e2e-triage.yml @@ -0,0 +1,327 @@ +name: E2E Triage + +on: + workflow_dispatch: + inputs: + run_url: + description: GitHub Actions run URL to triage + required: true + type: string + sha: + description: Commit SHA (auto-detected from run if omitted) + required: false + type: string + failed_agents: + description: Comma-separated list of failed agents (auto-detected from run if omitted) + required: false + type: string + slack_channel: + description: Slack channel ID for the originating thread + required: false + type: string + slack_thread_ts: + description: Slack thread timestamp for replies + required: false + type: string + rerun: + description: 'Re-run failing tests locally to detect flaky (costs API tokens)' + required: false + type: boolean + default: false + +permissions: + actions: read + contents: read + id-token: write + +concurrency: + group: e2e-triage-${{ inputs.run_url || github.run_id }} + cancel-in-progress: true + +jobs: + matrix-setup: + runs-on: ubuntu-latest + outputs: + agents: ${{ steps.set.outputs.agents }} + run_url: ${{ steps.set.outputs.run_url }} + sha: ${{ steps.set.outputs.sha }} + slack_channel: ${{ steps.set.outputs.slack_channel }} + slack_thread_ts: ${{ steps.set.outputs.slack_thread_ts }} + steps: + - name: Validate payload and build matrix + id: set + shell: bash + env: + GH_TOKEN: ${{ github.token }} + REPO_NAME: ${{ github.repository }} + RUN_URL_INPUT: ${{ inputs.run_url }} + SHA_INPUT: ${{ inputs.sha }} + FAILED_AGENTS_INPUT: ${{ inputs.failed_agents }} + SLACK_CHANNEL_INPUT: ${{ inputs.slack_channel }} + SLACK_THREAD_TS_INPUT: ${{ inputs.slack_thread_ts }} + run: | + set -euo pipefail + + csv_to_json() { + printf '%s' "$1" | jq -R -s -c 'split(",") | map(gsub("^\\s+|\\s+$"; "")) | map(select(length > 0))' + } + + run_url="$RUN_URL_INPUT" + sha="${SHA_INPUT:-}" + slack_channel="${SLACK_CHANNEL_INPUT:-}" + slack_thread_ts="${SLACK_THREAD_TS_INPUT:-}" + + # Derive missing values from run URL via GitHub API + if [ -z "$sha" ] || [ -z "$FAILED_AGENTS_INPUT" ]; then + run_id=$(echo "$run_url" | grep -oE '/runs/[0-9]+' | grep -oE '[0-9]+') + if [ -z "$run_id" ]; then + echo "Could not extract run ID from run_url: $run_url" >&2 + exit 1 + fi + run_data=$(gh run view "$run_id" --repo "$REPO_NAME" --json headSha,jobs) + if [ -z "$sha" ]; then + sha=$(echo "$run_data" | jq -r '.headSha') + fi + if [ -z "$FAILED_AGENTS_INPUT" ]; then + agents_json=$(echo "$run_data" | jq -c '[.jobs[] + | select(.conclusion == "failure") + | (.name | (try capture("\\((?[^)]+)\\)").agent catch null)) + | select(. != null) + ]') + fi + fi + + if [ -n "$FAILED_AGENTS_INPUT" ]; then + agents_json="$(csv_to_json "$FAILED_AGENTS_INPUT")" + fi + + if [ -z "$run_url" ]; then + echo "run_url is required" >&2 + exit 1 + fi + if [ -z "$sha" ]; then + echo "sha is required" >&2 + exit 1 + fi + if [ -z "$agents_json" ] || [ "$agents_json" = "[]" ] || [ "$agents_json" = "null" ]; then + echo "agents is required (provide failed_agents input or ensure failed job names contain '(agent-name)')" >&2 + exit 1 + fi + + echo "run_url=$run_url" >> "$GITHUB_OUTPUT" + echo "sha=$sha" >> "$GITHUB_OUTPUT" + echo "slack_channel=$slack_channel" >> "$GITHUB_OUTPUT" + echo "slack_thread_ts=$slack_thread_ts" >> "$GITHUB_OUTPUT" + echo "agents=$agents_json" >> "$GITHUB_OUTPUT" + + triage: + needs: [matrix-setup] + runs-on: ubuntu-latest + timeout-minutes: ${{ inputs.rerun == true && 90 || 45 }} + env: + RUN_URL: ${{ needs.matrix-setup.outputs.run_url }} + E2E_AGENT: ${{ matrix.agent }} + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_CHANNEL: ${{ needs.matrix-setup.outputs.slack_channel }} + SLACK_THREAD_TS: ${{ needs.matrix-setup.outputs.slack_thread_ts }} + strategy: + fail-fast: false + matrix: + agent: ${{ fromJson(needs.matrix-setup.outputs.agents) }} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 1 + + - name: Post triage started + if: ${{ env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} + shell: bash + run: | + set -euo pipefail + + scripts/post-slack-message.sh "Starting E2E triage for \`$E2E_AGENT\` on <$RUN_URL|this run>." + + - name: Setup mise + uses: jdx/mise-action@v4 + + - name: Download E2E artifacts + id: artifacts + if: inputs.rerun != true + env: + GH_TOKEN: ${{ github.token }} + run: | + artifact_path="$(scripts/download-e2e-artifacts.sh "$RUN_URL")" + echo "path=$artifact_path" >> "$GITHUB_OUTPUT" + + - name: Install system dependencies + if: inputs.rerun == true + run: sudo apt-get update && sudo apt-get install -y tmux + + - name: Install agent CLI + if: inputs.rerun == true + run: | + case "${{ matrix.agent }}" in + claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; + opencode) curl -fsSL https://opencode.ai/install | bash ;; + gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; + factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; + copilot-cli) npm install -g @github/copilot ;; + roger-roger) ;; # installed by mise (see mise.toml) + esac + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Bootstrap agent + if: inputs.rerun == true && matrix.agent != 'roger-roger' + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: go run ./e2e/bootstrap + + - name: Run triage (analysis only) + id: triage_analysis + if: inputs.rerun != true + uses: anthropics/claude-code-action@v1 + with: + prompt: | + /e2e:triage-ci ${{ steps.artifacts.outputs.path }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--allowedTools 'Read,Grep,Glob'" + display_report: true + + - name: Run triage (with re-runs) + id: triage_rerun + if: inputs.rerun == true + uses: anthropics/claude-code-action@v1 + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + with: + prompt: | + /e2e:triage-ci ${{ env.RUN_URL }} --agent ${{ matrix.agent }} --sha ${{ needs.matrix-setup.outputs.sha }} + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--allowedTools 'Read,Grep,Glob,Bash(mise:*),Bash(scripts:*)'" + display_report: true + + - name: Resolve triage outcome + id: triage + if: always() + shell: bash + env: + ANALYSIS_OUTCOME: ${{ steps.triage_analysis.outcome }} + ANALYSIS_EXEC: ${{ steps.triage_analysis.outputs.execution_file }} + RERUN_OUTCOME: ${{ steps.triage_rerun.outcome }} + RERUN_EXEC: ${{ steps.triage_rerun.outputs.execution_file }} + run: | + outcome="${ANALYSIS_OUTCOME:-skipped}" + exec_file="${ANALYSIS_EXEC}" + if [ "$outcome" = "skipped" ]; then + outcome="${RERUN_OUTCOME:-skipped}" + exec_file="${RERUN_EXEC}" + fi + echo "outcome=$outcome" >> "$GITHUB_OUTPUT" + echo "execution_file=$exec_file" >> "$GITHUB_OUTPUT" + + - name: Extract triage output + id: triage_output + if: always() && steps.triage.outputs.execution_file != '' + shell: bash + env: + EXECUTION_FILE: ${{ steps.triage.outputs.execution_file }} + TRIAGE_OUTPUT_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md + run: | + set -euo pipefail + + mkdir -p "$(dirname "$TRIAGE_OUTPUT_FILE")" + # Extract assistant text content from execution JSON + jq -r '[.[] | select(.type == "assistant") | .message.content[] + | select(.type == "text") | .text] | join("\n")' \ + "$EXECUTION_FILE" > "$TRIAGE_OUTPUT_FILE" + + - name: Generate fix plan + id: plan + if: steps.triage.outputs.outcome == 'success' + uses: anthropics/claude-code-action@v1 + with: + prompt: | + /e2e:implement Read the triage findings at ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/triage.md for agent ${{ matrix.agent }}. + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--allowedTools 'Read,Grep,Glob,Write,EnterPlanMode,ExitPlanMode,Bash(mise:*)'" + display_report: true + + - name: Extract plan output + id: plan_output + if: steps.plan.outcome == 'success' && steps.plan.outputs.execution_file != '' + shell: bash + env: + EXECUTION_FILE: ${{ steps.plan.outputs.execution_file }} + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + run: | + set -euo pipefail + + mkdir -p "$(dirname "$PLAN_FILE")" + jq -r '[.[] | select(.type == "assistant") | .message.content[] + | select(.type == "text") | .text] | join("\n")' \ + "$EXECUTION_FILE" > "$PLAN_FILE" + + - name: Post triage completion + if: ${{ always() && (steps.triage.outputs.outcome == 'success' || steps.triage.outputs.outcome == 'failure') && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' }} + shell: bash + env: + TRIAGE_OUTCOME: ${{ steps.triage.outputs.outcome }} + run: | + set -euo pipefail + + if [ "$TRIAGE_OUTCOME" = "success" ]; then + message="E2E triage complete for \`$E2E_AGENT\`." + else + message="E2E triage failed for \`$E2E_AGENT\`." + fi + + scripts/post-slack-message.sh "$message" + + - name: Post fix plan to Slack + if: steps.plan_output.outcome == 'success' && env.SLACK_BOT_TOKEN != '' && env.SLACK_CHANNEL != '' && env.SLACK_THREAD_TS != '' + shell: bash + env: + PLAN_FILE: ${{ github.workspace }}/e2e-triage-artifacts/${{ matrix.agent }}/plan.md + TRIAGE_RUN_ID: ${{ github.run_id }} + run: | + set -euo pipefail + + # Extract first few lines as summary + summary="$(head -20 "$PLAN_FILE" | sed '/^$/d' | head -5)" + + # Construct Fix It URL + encoded_run_url="$(python3 -c "import urllib.parse, os; print(urllib.parse.quote(os.environ['RUN_URL'], safe=''))")" + fix_url="https://e2e-triage.entireio.workers.dev/fix?triage_run_id=${TRIAGE_RUN_ID}&run_url=${encoded_run_url}&failed_agents=${E2E_AGENT}&slack_channel=${SLACK_CHANNEL}&slack_thread_ts=${SLACK_THREAD_TS}" + + message="Fix plan ready for \`$E2E_AGENT\`: + ${summary} + + <${fix_url}|Fix It> — applies the plan and creates a draft PR" + + scripts/post-slack-message.sh "$message" + + - name: Upload triage output + if: always() + uses: actions/upload-artifact@v7 + with: + name: e2e-triage-${{ matrix.agent }} + path: e2e-triage-artifacts/ + retention-days: 7 + + - name: Upload plan artifact + if: always() && steps.plan_output.outcome == 'success' + uses: actions/upload-artifact@v7 + with: + name: e2e-plan-${{ matrix.agent }} + path: e2e-triage-artifacts/${{ matrix.agent }}/plan.md + retention-days: 7 diff --git a/scripts/post-slack-message.sh b/scripts/post-slack-message.sh new file mode 100755 index 000000000..76546dc0b --- /dev/null +++ b/scripts/post-slack-message.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Post a message to a Slack thread using the chat.postMessage API. +# Requires SLACK_BOT_TOKEN, SLACK_CHANNEL, and SLACK_THREAD_TS env vars. + +text="${1:?message is required}" +payload="$(jq -n \ + --arg channel "$SLACK_CHANNEL" \ + --arg thread_ts "$SLACK_THREAD_TS" \ + --arg text "$text" \ + '{channel: $channel, thread_ts: $thread_ts, text: $text}')" + +if ! response="$(curl -fsS https://slack.com/api/chat.postMessage \ + -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \ + -H 'Content-type: application/json; charset=utf-8' \ + --data "$payload")"; then + echo "warning: slack notification failed" >&2 + exit 0 +fi + +if ! jq -e '.ok == true' >/dev/null <<<"$response"; then + echo "warning: slack notification returned non-ok response" >&2 +fi