Claude Stale Check #100
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Claude Stale Check | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| - cron: "17 */2 * * *" | |
| jobs: | |
| detect-issues: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| has_stuck_issues: ${{ steps.detect.outputs.has_stuck_issues }} | |
| stuck_issues_summary: ${{ steps.detect.outputs.stuck_issues_summary }} | |
| permissions: | |
| issues: write # Need write to add needs-attention label as fallback | |
| pull-requests: read | |
| contents: read | |
| actions: write | |
| steps: | |
| - name: Detect stuck issues | |
| id: detect | |
| env: | |
| # Use PAT so label changes trigger other workflows | |
| GH_TOKEN: ${{ secrets.PAT_WORKFLOW_TRIGGER || github.token }} | |
| run: | | |
| echo "=== Detecting stuck issues ===" | |
| STUCK_ISSUES="" | |
| HAS_ISSUES="false" | |
| STALE_RUNNING_SECONDS=5400 | |
| # Clean durable queue labels that can no longer produce useful work. | |
| CLOSED_QUEUE_ISSUES=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "impl:queued" \ | |
| --state closed \ | |
| --json number \ | |
| --jq '.[].number') | |
| for ISSUE_NUM in $CLOSED_QUEUE_ISSUES; do | |
| echo "CLOSED QUEUED: #$ISSUE_NUM - removing impl:queued" | |
| gh issue edit "$ISSUE_NUM" --repo "${{ github.repository }}" --remove-label "impl:queued" 2>/dev/null || true | |
| done | |
| RUNNING_ISSUES=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "impl:running" \ | |
| --state all \ | |
| --json number,state,updatedAt \ | |
| --jq '.[] | "\(.number)|\(.state)|\(.updatedAt)"') | |
| while IFS='|' read -r ISSUE_NUM ISSUE_STATE UPDATED_AT; do | |
| [ -z "$ISSUE_NUM" ] && continue | |
| ISSUE_BODY=$(gh issue view "$ISSUE_NUM" --repo "${{ github.repository }}" --json body --jq '.body // ""') | |
| ISSUE_TITLE=$(gh issue view "$ISSUE_NUM" --repo "${{ github.repository }}" --json title --jq '.title') | |
| CLAIM_JSON=$(echo "$ISSUE_BODY" | grep -o '<!-- automation-claim: {[^}]*}' | sed 's/<!-- automation-claim: //' | tail -1 || true) | |
| RUN_ID="" | |
| CLAIMED_AT="$UPDATED_AT" | |
| RUN_VALID="false" | |
| RUN_STATUS="unknown" | |
| RUN_CONCLUSION="" | |
| if [ -n "$CLAIM_JSON" ]; then | |
| RUN_ID=$(echo "$CLAIM_JSON" | jq -r '.run_id // empty' 2>/dev/null || true) | |
| CLAIMED_AT=$(echo "$CLAIM_JSON" | jq -r '.claimed_at // empty' 2>/dev/null || true) | |
| CLAIMED_AT=${CLAIMED_AT:-$UPDATED_AT} | |
| fi | |
| if [ -n "$RUN_ID" ]; then | |
| RUN_DATA=$(gh api "/repos/${{ github.repository }}/actions/runs/${RUN_ID}" 2>/dev/null || echo "{}") | |
| RUN_PATH=$(echo "$RUN_DATA" | jq -r '.path // ""') | |
| RUN_DISPLAY_TITLE=$(echo "$RUN_DATA" | jq -r '.display_title // ""') | |
| RUN_STATUS=$(echo "$RUN_DATA" | jq -r '.status // "unknown"') | |
| RUN_CONCLUSION=$(echo "$RUN_DATA" | jq -r '.conclusion // ""') | |
| if [ "$RUN_PATH" = ".github/workflows/claude-issue.yml" ] && [ "$RUN_DISPLAY_TITLE" = "$ISSUE_TITLE" ]; then | |
| RUN_VALID="true" | |
| else | |
| echo "::warning::Ignoring untrusted run claim for issue #$ISSUE_NUM: run_id=$RUN_ID path=$RUN_PATH title=$RUN_DISPLAY_TITLE" | |
| fi | |
| fi | |
| AGE_SECONDS=$(jq -nr --arg ts "$CLAIMED_AT" 'try (now - ($ts | fromdateiso8601)) catch 0') | |
| AUTOMATION_STATUS=$(echo "$ISSUE_BODY" | grep -o '<!-- automation-state: {[^}]*}' | sed 's/<!-- automation-state: //' | jq -r '.status // empty' 2>/dev/null || true) | |
| if [ "$ISSUE_STATE" = "CLOSED" ]; then | |
| echo "CLOSED RUNNING: #$ISSUE_NUM - removing impl:running" | |
| gh issue edit "$ISSUE_NUM" --repo "${{ github.repository }}" --remove-label "impl:running" 2>/dev/null || true | |
| if [ "$RUN_VALID" = "true" ]; then | |
| if [ "$RUN_STATUS" = "in_progress" ] || [ "$RUN_STATUS" = "queued" ] || [ "$RUN_STATUS" = "waiting" ]; then | |
| echo "Cancelling recorded run $RUN_ID for closed issue #$ISSUE_NUM" | |
| gh run cancel "$RUN_ID" --repo "${{ github.repository }}" || true | |
| fi | |
| elif [ -z "$RUN_ID" ]; then | |
| echo "::warning::Issue #$ISSUE_NUM has impl:running but no trusted run_id; not cancelling any run." | |
| fi | |
| continue | |
| fi | |
| if awk "BEGIN { exit !($AGE_SECONDS > $STALE_RUNNING_SECONDS) }"; then | |
| echo "STALE RUNNING: #$ISSUE_NUM claimed ${AGE_SECONDS}s ago" | |
| STUCK_ISSUES="$STUCK_ISSUES #$ISSUE_NUM(stale-running)" | |
| HAS_ISSUES="true" | |
| if [ "$RUN_VALID" = "true" ]; then | |
| if [ "$RUN_STATUS" = "in_progress" ] || [ "$RUN_STATUS" = "queued" ] || [ "$RUN_STATUS" = "waiting" ]; then | |
| echo "Cancelling recorded stale run $RUN_ID for issue #$ISSUE_NUM" | |
| if gh run cancel "$RUN_ID" --repo "${{ github.repository }}"; then | |
| echo "Requeueing issue #$ISSUE_NUM after cancelling run $RUN_ID" | |
| gh issue edit "$ISSUE_NUM" \ | |
| --repo "${{ github.repository }}" \ | |
| --remove-label "impl:running" \ | |
| --add-label "impl:queued" 2>/dev/null || true | |
| else | |
| echo "::warning::Could not cancel run $RUN_ID for issue #$ISSUE_NUM; leaving impl:running in place." | |
| fi | |
| elif [ "$RUN_STATUS" = "completed" ]; then | |
| if [ "$RUN_CONCLUSION" = "success" ] || [ "$AUTOMATION_STATUS" = "SUCCESS" ]; then | |
| echo "Recorded run $RUN_ID completed successfully; removing stale impl:running for issue #$ISSUE_NUM" | |
| gh issue edit "$ISSUE_NUM" \ | |
| --repo "${{ github.repository }}" \ | |
| --remove-label "impl:running" 2>/dev/null || true | |
| else | |
| echo "Recorded run $RUN_ID completed with conclusion=${RUN_CONCLUSION:-unknown}; requeueing open issue #$ISSUE_NUM" | |
| gh issue edit "$ISSUE_NUM" \ | |
| --repo "${{ github.repository }}" \ | |
| --remove-label "impl:running" \ | |
| --add-label "impl:queued" 2>/dev/null || true | |
| fi | |
| fi | |
| else | |
| echo "::warning::Issue #$ISSUE_NUM is stale but has no trusted run_id; marking needs-attention only." | |
| fi | |
| gh issue edit "$ISSUE_NUM" --repo "${{ github.repository }}" --add-label "needs-attention" 2>/dev/null || true | |
| fi | |
| done <<< "$RUNNING_ISSUES" | |
| # Find the active epic | |
| EPIC_NUMBER=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "type:epic" \ | |
| --label "status:active" \ | |
| --state open \ | |
| --json number \ | |
| --jq '.[0].number') | |
| if [ -n "$EPIC_NUMBER" ] && [ "$EPIC_NUMBER" != "null" ]; then | |
| echo "Active epic: #$EPIC_NUMBER" | |
| # Get epic issues | |
| EPIC_ISSUES=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "epic:message-history" \ | |
| --state open \ | |
| --json number,labels \ | |
| --jq '.[] | "\(.number)|\(.labels | map(.name) | join(","))"') | |
| while IFS='|' read -r ISSUE_NUM ISSUE_LABELS; do | |
| [ -z "$ISSUE_NUM" ] && continue | |
| # Check if issue has no automation labels | |
| if ! echo "$ISSUE_LABELS" | grep -qE "needs-review|ready-for-implementation|needs-attention|needs-clarification|needs-breakdown"; then | |
| # Check blockers - handles both inline and multiline formats | |
| ISSUE_BODY=$(gh issue view "$ISSUE_NUM" --repo "${{ github.repository }}" --json body --jq '.body') | |
| BLOCKED_SECTION=$(echo "$ISSUE_BODY" | sed -n '/[Bb]locked [Bb]y:/,/\*\*[Bb]locks\|^##\|^$/p' | grep -v "^\*\*[Bb]locks" | head -10 || true) | |
| BLOCKERS=$(echo "$BLOCKED_SECTION" | grep -oE '#[0-9]+' | tr -d '#' | sort -u || true) | |
| ALL_RESOLVED=true | |
| for BLOCKER in $BLOCKERS; do | |
| [ "$BLOCKER" = "$ISSUE_NUM" ] && continue | |
| BLOCKER_STATE=$(gh issue view "$BLOCKER" --repo "${{ github.repository }}" --json state --jq '.state' 2>/dev/null || echo "UNKNOWN") | |
| [ "$BLOCKER_STATE" != "CLOSED" ] && ALL_RESOLVED=false && break | |
| done | |
| if [ "$ALL_RESOLVED" = "true" ]; then | |
| echo "STUCK: #$ISSUE_NUM - no labels but all blockers resolved" | |
| STUCK_ISSUES="$STUCK_ISSUES #$ISSUE_NUM" | |
| HAS_ISSUES="true" | |
| fi | |
| fi | |
| done <<< "$EPIC_ISSUES" | |
| fi | |
| # Check for stale needs-review (2+ hours without activity) | |
| STALE_REVIEWS=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "needs-review" \ | |
| --state open \ | |
| --json number,updatedAt \ | |
| --jq '.[] | select((now - (.updatedAt | fromdateiso8601)) > 7200) | .number') | |
| for ISSUE_NUM in $STALE_REVIEWS; do | |
| HAS_REVIEW=$(gh issue view "$ISSUE_NUM" --repo "${{ github.repository }}" --json comments \ | |
| --jq '[.comments[] | select(.body | test("recommendation|ready-for-implementation|needs-clarification"; "i"))] | length') | |
| if [ "$HAS_REVIEW" -eq 0 ]; then | |
| echo "STALE REVIEW: #$ISSUE_NUM" | |
| STUCK_ISSUES="$STUCK_ISSUES #$ISSUE_NUM(stale-review)" | |
| HAS_ISSUES="true" | |
| fi | |
| done | |
| # Check for stale ready-for-implementation (2+ hours without PR/status) | |
| STALE_IMPL=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --label "ready-for-implementation" \ | |
| --state open \ | |
| --json number,updatedAt \ | |
| --jq '.[] | select((now - (.updatedAt | fromdateiso8601)) > 7200) | .number') | |
| for ISSUE_NUM in $STALE_IMPL; do | |
| HAS_STATUS=$(gh issue view "$ISSUE_NUM" --repo "${{ github.repository }}" --json comments \ | |
| --jq '[.comments[] | select(.body | contains("claude-implementation-status"))] | length') | |
| if [ "$HAS_STATUS" -eq 0 ]; then | |
| echo "STALE IMPLEMENTATION: #$ISSUE_NUM" | |
| STUCK_ISSUES="$STUCK_ISSUES #$ISSUE_NUM(stale-impl)" | |
| HAS_ISSUES="true" | |
| fi | |
| done | |
| echo "has_stuck_issues=$HAS_ISSUES" >> "$GITHUB_OUTPUT" | |
| echo "stuck_issues_summary=$STUCK_ISSUES" >> "$GITHUB_OUTPUT" | |
| if [ "$HAS_ISSUES" = "true" ]; then | |
| echo "" | |
| echo "=== Issues requiring attention: $STUCK_ISSUES ===" | |
| # Add needs-attention for visibility (in case Claude can't run) | |
| for ISSUE in $STUCK_ISSUES; do | |
| ISSUE_NUM=$(echo "$ISSUE" | grep -oE '[0-9]+') | |
| [ -n "$ISSUE_NUM" ] && gh issue edit "$ISSUE_NUM" --repo "${{ github.repository }}" --add-label "needs-attention" 2>/dev/null || true | |
| done | |
| else | |
| echo "No stuck issues detected" | |
| fi | |
| claude-diagnose: | |
| needs: detect-issues | |
| if: needs.detect-issues.outputs.has_stuck_issues == 'true' | |
| runs-on: ubuntu-latest | |
| # Shares the global implementation lock with the issue runner so two | |
| # main-mutating automations never open conflicting branches/PRs at once. | |
| concurrency: | |
| group: claude-impl | |
| cancel-in-progress: false | |
| permissions: | |
| contents: read | |
| issues: write | |
| pull-requests: write | |
| id-token: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 1 | |
| - name: Setup Claude Code | |
| id: setup-claude | |
| uses: ./.github/actions/setup-claude-code | |
| - name: Claude diagnoses and fixes stuck issues | |
| uses: anthropics/claude-code-action@v1 | |
| env: | |
| GH_TOKEN: ${{ secrets.PAT_WORKFLOW_TRIGGER || github.token }} | |
| with: | |
| allowed_bots: "claude" | |
| claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} | |
| # Use PAT for git operations to avoid GitHub App workflow permission issues | |
| github_token: ${{ secrets.PAT_WORKFLOW_TRIGGER || github.token }} | |
| path_to_claude_code_executable: ${{ steps.setup-claude.outputs.executable-path }} | |
| prompt: | | |
| STUCK ISSUES: ${{ needs.detect-issues.outputs.stuck_issues_summary }} | |
| These issues appear stuck in the automation pipeline. Read `docs/guidelines/github-workflows.md` | |
| to understand the workflow, then investigate each issue and fix whatever is blocking it. | |
| Use `gh` commands to inspect issues, check blockers, and take corrective action | |
| (add labels, fix issue bodies, post comments). Be concise. | |
| claude_args: '--model claude-sonnet-4-6 --allowed-tools "Read,Glob,Grep,Bash,TodoWrite"' | |
| # Parallel job for orphan branch recovery (doesn't need Claude) | |
| check-orphans: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| issues: write | |
| pull-requests: write | |
| contents: read | |
| steps: | |
| - name: Check for orphan branches | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| echo "=== Checking for orphan Claude branches ===" | |
| # List remote branches matching claude/* pattern | |
| CLAUDE_BRANCHES=$(git ls-remote --heads "https://github.com/${{ github.repository }}.git" 'refs/heads/claude/*' | awk '{print $2}' | sed 's|refs/heads/||') | |
| for branch in $CLAUDE_BRANCHES; do | |
| # Extract issue number from branch name (claude/123-description) | |
| ISSUE_NUMBER=$(echo "$branch" | sed -n 's/claude\/\([0-9]*\)-.*/\1/p') | |
| if [ -n "$ISSUE_NUMBER" ]; then | |
| # Check if there's an open PR for this branch | |
| PR_EXISTS=$(gh pr list \ | |
| --repo "${{ github.repository }}" \ | |
| --head "$branch" \ | |
| --state open \ | |
| --json number \ | |
| --jq 'length') | |
| if [ "$PR_EXISTS" -eq 0 ]; then | |
| echo "Orphan branch found: $branch (no open PR)" | |
| # Check if the linked issue is still open | |
| ISSUE_STATE=$(gh issue view "$ISSUE_NUMBER" --repo "${{ github.repository }}" --json state --jq '.state' 2>/dev/null || echo "NOTFOUND") | |
| if [ "$ISSUE_STATE" = "OPEN" ]; then | |
| # Create a draft PR to capture the work | |
| echo "Creating draft PR for orphan branch $branch" | |
| gh pr create \ | |
| --repo "${{ github.repository }}" \ | |
| --head "$branch" \ | |
| --base main \ | |
| --title "[Draft] Recovered work for #$ISSUE_NUMBER" \ | |
| --body "$(cat <<EOF | |
| ## Recovered Branch | |
| This draft PR was automatically created to capture work from an orphan branch. | |
| **Issue:** #$ISSUE_NUMBER | |
| **Branch:** \`$branch\` | |
| This branch existed without an associated PR. Please review the changes | |
| and either: | |
| 1. Complete the implementation and mark ready for review | |
| 2. Close if the work is no longer needed | |
| --- | |
| *Automatically created by stale check workflow* | |
| EOF | |
| )" \ | |
| --draft || echo "Failed to create PR (may already exist or branch has no commits)" | |
| fi | |
| fi | |
| fi | |
| done | |
| - name: Log stuck PRs | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| echo "=== Checking for stuck PRs ===" | |
| # Find PRs with needs-second-opinion or needs-attention that are old | |
| STUCK_PRS=$(gh pr list \ | |
| --repo "${{ github.repository }}" \ | |
| --state open \ | |
| --json number,title,updatedAt,labels \ | |
| --jq '[.[] | select(.labels | map(.name) | any(. == "needs-human-review" or . == "needs-attention"))]') | |
| echo "$STUCK_PRS" | jq -c '.[]' | while read -r pr; do | |
| PR_NUMBER=$(echo "$pr" | jq -r '.number') | |
| UPDATED=$(echo "$pr" | jq -r '.updatedAt') | |
| # Check if updated more than 24 hours ago | |
| UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$UPDATED" +%s 2>/dev/null || echo 0) | |
| NOW_EPOCH=$(date +%s) | |
| HOURS_AGO=$(( (NOW_EPOCH - UPDATED_EPOCH) / 3600 )) | |
| if [ "$HOURS_AGO" -ge 24 ]; then | |
| echo "PR #$PR_NUMBER has been stuck for ${HOURS_AGO} hours" | |
| # Just log - don't spam comments on human-review items | |
| fi | |
| done | |
| echo "Stale check complete" |