Skill Validation #3614
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Skill & agent validation for PRs touching .github/skills/ or .github/agents/. | |
| # | |
| # Two modes: | |
| # 1. Static checks — run automatically on every PR that touches skills/agents. | |
| # 2. LLM evaluation — runs automatically for contributor PRs, or can be | |
| # triggered by a repo contributor posting "/evaluate-skills" on any PR. | |
| # Requires COPILOT_GITHUB_TOKEN secret (Copilot API access). | |
| # | |
| # Trigger model: | |
| # - pull_request_target: runs in the base repo context with full permissions | |
| # and secret access, even for fork PRs. Workflow YAML is always from the | |
| # default branch (not the PR), ensuring security. | |
| # - issue_comment (/evaluate-skills): same security model as pull_request_target. | |
| # Always runs workflow YAML from the default branch. | |
| # | |
| # Security model: | |
| # - Workflow YAML: always from the default branch (enforced by both triggers) | |
| # - Validator binary: downloaded from dotnet/skills releases (trusted) | |
| # - Skill/test content: checked out from the PR via sparse-checkout | |
| # (only .github/skills and .github/agents — markdown/YAML data files) | |
| # - No PR code is compiled or executed | |
| # - LLM evaluation: only runs for PRs from contributors with write+ access, | |
| # or when explicitly triggered via /evaluate-skills by a contributor | |
| name: Skill Validation | |
| on: | |
| pull_request_target: | |
| types: [opened, synchronize, reopened] | |
| paths: | |
| - '.github/skills/**' | |
| - '.github/agents/**' | |
| - '.github/plugin.json' | |
| - '.github/workflows/skill-validation.yml' | |
| issue_comment: | |
| types: [created] | |
| workflow_dispatch: | |
| concurrency: | |
| group: >- | |
| skill-validation-${{ | |
| github.event_name == 'issue_comment' | |
| && startsWith(github.event.comment.body, '/evaluate-skills') | |
| && format('eval-{0}', github.event.issue.number) | |
| || github.event_name == 'issue_comment' | |
| && format('noop-{0}-{1}', github.event.issue.number, github.event.comment.id) | |
| || github.event_name == 'pull_request_target' | |
| && format('pr-{0}', github.event.pull_request.number) | |
| || github.run_id | |
| }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| issues: write | |
| statuses: write | |
| checks: write | |
| env: | |
| VALIDATOR_CACHE_PREFIX: skill-validator-linux-x64 | |
| jobs: | |
| # ========================================================================== | |
| # PR GATE (pull_request_target) | |
| # Determine PR source, author permissions, and changed files. | |
| # ========================================================================== | |
| pr-gate: | |
| name: PR gate | |
| if: github.event_name == 'pull_request_target' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| outputs: | |
| head_sha: ${{ github.event.pull_request.head.sha }} | |
| head_repo: ${{ github.event.pull_request.head.repo.full_name }} | |
| pr_number: ${{ github.event.pull_request.number }} | |
| is_contributor: ${{ steps.perms.outputs.is_contributor }} | |
| is_fork: ${{ steps.info.outputs.is_fork }} | |
| changed_skills: ${{ steps.discover.outputs.changed_skills }} | |
| has_skill_changes: ${{ steps.discover.outputs.has_skill_changes }} | |
| has_agent_changes: ${{ steps.discover.outputs.has_agent_changes }} | |
| steps: | |
| - name: Determine fork status | |
| id: info | |
| env: | |
| HEAD: ${{ github.event.pull_request.head.repo.full_name }} | |
| BASE: ${{ github.event.pull_request.base.repo.full_name }} | |
| run: | | |
| IS_FORK=$([[ "$HEAD" != "$BASE" ]] && echo true || echo false) | |
| echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT | |
| echo "PR from $HEAD → $BASE (fork=$IS_FORK)" | |
| - name: Check PR author permissions | |
| id: perms | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| AUTHOR: ${{ github.event.pull_request.user.login }} | |
| run: | | |
| AUTHOR="$AUTHOR" | |
| PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${AUTHOR}/permission" \ | |
| --jq '.permission' 2>/dev/null || echo "none") | |
| echo "PR author $AUTHOR has permission: $PERMISSION" | |
| if [[ "$PERMISSION" == "admin" || "$PERMISSION" == "write" || "$PERMISSION" == "maintain" ]]; then | |
| echo "is_contributor=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "is_contributor=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Discover changed files | |
| id: discover | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| PR_NUMBER: ${{ github.event.pull_request.number }} | |
| run: | | |
| CHANGED=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \ | |
| --paginate --jq '.[].filename') | |
| SKILL_DIRS=$(echo "$CHANGED" | grep '^\.github/skills/' | \ | |
| sed 's|^\.github/skills/\([^/]*\)/.*|\1|' | sort -u || true) | |
| AGENT_FILES=$(echo "$CHANGED" | grep '^\.github/agents/' || true) | |
| echo "has_skill_changes=$( [ -n "$SKILL_DIRS" ] && echo true || echo false )" >> $GITHUB_OUTPUT | |
| echo "has_agent_changes=$( [ -n "$AGENT_FILES" ] && echo true || echo false )" >> $GITHUB_OUTPUT | |
| DELIM="EOF_$(openssl rand -hex 8)" | |
| echo "changed_skills<<$DELIM" >> $GITHUB_OUTPUT | |
| echo "$SKILL_DIRS" >> $GITHUB_OUTPUT | |
| echo "$DELIM" >> $GITHUB_OUTPUT | |
| echo "Changed skills: $SKILL_DIRS" | |
| echo "Changed agents: $AGENT_FILES" | |
| # ========================================================================== | |
| # SLASH COMMAND GATE (/evaluate-skills) | |
| # ========================================================================== | |
| slash-gate: | |
| name: Gate (/evaluate-skills) | |
| if: >- | |
| github.event_name == 'issue_comment' && | |
| github.event.issue.pull_request && | |
| startsWith(github.event.comment.body, '/evaluate-skills') | |
| runs-on: ubuntu-latest | |
| outputs: | |
| head_sha: ${{ steps.pr.outputs.head_sha }} | |
| head_repo: ${{ steps.pr.outputs.head_repo }} | |
| pr_number: ${{ steps.pr.outputs.pr_number }} | |
| steps: | |
| - name: Check commenter permissions | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| COMMENTER: ${{ github.event.comment.user.login }} | |
| run: | | |
| PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${COMMENTER}/permission" \ | |
| --jq '.permission') | |
| echo "Commenter $COMMENTER has permission: $PERMISSION" | |
| if [[ "$PERMISSION" != "admin" && "$PERMISSION" != "write" && "$PERMISSION" != "maintain" ]]; then | |
| echo "::error::User does not have write access" | |
| exit 1 | |
| fi | |
| - name: Get PR details | |
| id: pr | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| ISSUE_NUMBER: ${{ github.event.issue.number }} | |
| run: | | |
| PR_NUMBER="$ISSUE_NUMBER" | |
| PR_DATA=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}") | |
| HEAD_SHA=$(echo "$PR_DATA" | jq -r '.head.sha') | |
| HEAD_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name') | |
| echo "head_sha=${HEAD_SHA}" >> $GITHUB_OUTPUT | |
| echo "head_repo=${HEAD_REPO}" >> $GITHUB_OUTPUT | |
| echo "pr_number=${PR_NUMBER}" >> $GITHUB_OUTPUT | |
| - name: Add reaction to comment | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| COMMENT_ID: ${{ github.event.comment.id }} | |
| run: | | |
| gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions" \ | |
| -X POST -f content='eyes' || true | |
| - name: Set pending commit status | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| gh api "repos/${{ github.repository }}/statuses/${{ steps.pr.outputs.head_sha }}" \ | |
| -f state=pending \ | |
| -f context="skill-validation" \ | |
| -f description="Skill evaluation in progress..." \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # ========================================================================== | |
| # STATIC VALIDATION | |
| # Always runs for PRs (all types) and slash-commands. | |
| # ========================================================================== | |
| static-check: | |
| name: Static validation | |
| needs: [pr-gate, slash-gate] | |
| if: >- | |
| always() && !cancelled() && ( | |
| needs.pr-gate.result == 'success' || | |
| needs.slash-gate.result == 'success' || | |
| github.event_name == 'workflow_dispatch' | |
| ) | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| outputs: | |
| exit_code: ${{ steps.check.outputs.exit_code }} | |
| steps: | |
| - name: Checkout PR content | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo || github.repository }} | |
| ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha || '' }} | |
| sparse-checkout: | | |
| .github/skills | |
| .github/agents | |
| .github/plugin.json | |
| persist-credentials: false | |
| # ── Download & cache skill-validator ────────────────────────── | |
| - name: Get cache key date | |
| id: cache-date | |
| run: echo "date=$(date +%Y-%m-%d)" >> "$GITHUB_OUTPUT" | |
| - name: Restore skill-validator from cache | |
| id: cache-sv | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: skill-validator-bin | |
| key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} | |
| restore-keys: | | |
| ${{ env.VALIDATOR_CACHE_PREFIX }}- | |
| - name: Download skill-validator | |
| if: steps.cache-sv.outputs.cache-hit != 'true' | |
| run: | | |
| mkdir -p skill-validator-bin | |
| curl -fsSL --retry 3 --retry-all-errors -o skill-validator.tar.gz \ | |
| https://github.com/dotnet/skills/releases/download/skill-validator-nightly/skill-validator-linux-x64.tar.gz | |
| tar -xzf skill-validator.tar.gz -C skill-validator-bin | |
| if [ ! -f skill-validator-bin/skill-validator ]; then | |
| echo "::error::skill-validator binary not found after extraction" | |
| exit 1 | |
| fi | |
| chmod +x skill-validator-bin/skill-validator | |
| - name: Save skill-validator to cache | |
| if: steps.cache-sv.outputs.cache-hit != 'true' | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: skill-validator-bin | |
| key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} | |
| # ── Run skill-validator check ───────────────────────────────── | |
| - name: Run skill-validator check | |
| id: check | |
| shell: bash | |
| env: | |
| CHANGED_SKILLS: ${{ needs.pr-gate.outputs.changed_skills }} | |
| run: | | |
| rc=0 | |
| if [ -d .github/skills ]; then | |
| echo "::group::Validate skills" | |
| # For PR path: validate only changed skills for efficiency | |
| # For slash-command or workflow_dispatch: validate all | |
| PR_GATE="${{ needs.pr-gate.result }}" | |
| if [[ "$PR_GATE" == "success" ]]; then | |
| SKILLS_ARG="" | |
| while IFS= read -r skill; do | |
| [ -z "$skill" ] && continue | |
| SKILL_DIR=".github/skills/$skill" | |
| if [ -d "$SKILL_DIR" ]; then | |
| SKILLS_ARG="$SKILLS_ARG --skills $SKILL_DIR" | |
| fi | |
| done <<< "$CHANGED_SKILLS" | |
| # Fallback to all if no specific skills found | |
| [ -z "$SKILLS_ARG" ] && SKILLS_ARG="--skills .github/skills" | |
| else | |
| SKILLS_ARG="--skills .github/skills" | |
| fi | |
| set +e | |
| skill-validator-bin/skill-validator check $SKILLS_ARG --allow-repo-traversal --verbose 2>&1 | tee skill-check-skills.txt | |
| skills_rc=${PIPESTATUS[0]} | |
| set -e | |
| echo "::endgroup::" | |
| if [ "$skills_rc" -ne 0 ]; then rc=1; fi | |
| fi | |
| if [ -d .github/agents ]; then | |
| echo "::group::Validate agents" | |
| set +e | |
| skill-validator-bin/skill-validator check --agents .github/agents --verbose 2>&1 | tee skill-check-agents.txt | |
| agents_rc=${PIPESTATUS[0]} | |
| set -e | |
| echo "::endgroup::" | |
| if [ "$agents_rc" -ne 0 ]; then rc=1; fi | |
| fi | |
| cat skill-check-skills.txt skill-check-agents.txt > sv-output.txt 2>/dev/null || true | |
| echo "exit_code=$rc" >> "$GITHUB_OUTPUT" | |
| # Step summary | |
| { | |
| echo "## skill-validator check" | |
| echo "" | |
| skill_count=$(find .github/skills -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) | |
| agent_count=$(find .github/agents -name '*.agent.md' 2>/dev/null | wc -l) | |
| if [ "$rc" -eq 0 ]; then | |
| echo "All checks passed." | |
| echo "" | |
| echo "Validated **${skill_count}** skill(s) and **${agent_count}** agent(s)." | |
| else | |
| for f in skill-check-skills.txt skill-check-agents.txt; do | |
| if [ -f "$f" ]; then | |
| echo "### ${f}" | |
| echo '```' | |
| head -n 200 "$f" | |
| echo '```' | |
| echo "" | |
| fi | |
| done | |
| fi | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| # ── Upload results for comment job ──────────────────────────── | |
| - name: Save results artifact | |
| if: always() | |
| run: | | |
| mkdir -p sv-results | |
| skill_count=$(find .github/skills -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) | |
| agent_count=$(find .github/agents -name '*.agent.md' 2>/dev/null | wc -l) | |
| echo "$skill_count" > sv-results/skill-count.txt | |
| echo "$agent_count" > sv-results/agent-count.txt | |
| echo "${{ steps.check.outputs.exit_code }}" > sv-results/exit-code.txt | |
| if [ -f sv-output.txt ]; then | |
| cp sv-output.txt sv-results/sv-output.txt | |
| fi | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: static-check-results | |
| path: sv-results/ | |
| retention-days: 1 | |
| - name: Fail if checks failed | |
| if: steps.check.outputs.exit_code != '0' | |
| run: exit 1 | |
| # ========================================================================== | |
| # DISCOVER EVALUATABLE SKILLS | |
| # Only runs when LLM eval should happen (contributor PR or slash-command). | |
| # ========================================================================== | |
| discover-eval: | |
| name: Discover skills to evaluate | |
| needs: [pr-gate, slash-gate] | |
| if: >- | |
| always() && !cancelled() && ( | |
| (needs.pr-gate.result == 'success' && needs.pr-gate.outputs.is_contributor == 'true') || | |
| needs.slash-gate.result == 'success' | |
| ) | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| outputs: | |
| entries: ${{ steps.find.outputs.entries }} | |
| has_entries: ${{ steps.find.outputs.has_entries }} | |
| steps: | |
| - name: Checkout PR content | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo }} | |
| ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }} | |
| sparse-checkout: | | |
| .github/skills | |
| .github/plugin.json | |
| persist-credentials: false | |
| - name: Discover changed files | |
| id: changed | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| PR_NUMBER: ${{ needs.pr-gate.outputs.pr_number || needs.slash-gate.outputs.pr_number }} | |
| run: | | |
| CHANGED=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \ | |
| --paginate --jq '.[].filename') | |
| SKILL_DIRS=$(echo "$CHANGED" | grep '^\.github/skills/' | \ | |
| sed 's|^\.github/skills/\([^/]*\)/.*|\1|' | sort -u || true) | |
| # Check for workflow changes (evaluate all skills with tests) | |
| WORKFLOW_CHANGES=$(echo "$CHANGED" | grep '^\.github/workflows/skill-validation' || true) | |
| DELIM="EOF_$(openssl rand -hex 8)" | |
| echo "skill_dirs<<$DELIM" >> $GITHUB_OUTPUT | |
| echo "$SKILL_DIRS" >> $GITHUB_OUTPUT | |
| echo "$DELIM" >> $GITHUB_OUTPUT | |
| if [ -n "$WORKFLOW_CHANGES" ]; then | |
| echo "eval_all=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "eval_all=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Find skills with eval tests | |
| id: find | |
| shell: pwsh | |
| env: | |
| SKILL_DIRS: ${{ steps.changed.outputs.skill_dirs }} | |
| EVAL_ALL: ${{ steps.changed.outputs.eval_all }} | |
| run: | | |
| $entries = @() | |
| $evalAll = $env:EVAL_ALL -eq "true" | |
| if ($evalAll) { | |
| Write-Host "Workflow changes detected - evaluating all skills with tests" | |
| $skills = @(Get-ChildItem -Path ".github/skills" -Directory | | |
| Select-Object -ExpandProperty Name) | |
| } else { | |
| $raw = $env:SKILL_DIRS | |
| $skills = @($raw.Split("`n", [StringSplitOptions]::RemoveEmptyEntries) | | |
| ForEach-Object { $_.Trim() } | | |
| Where-Object { $_ }) | |
| } | |
| foreach ($skill in $skills) { | |
| $evalFile = ".github/skills/$skill/tests/eval.yaml" | |
| if (Test-Path $evalFile) { | |
| Write-Host " -> $skill has eval tests" | |
| $entries += @{ | |
| name = $skill | |
| skills_path = ".github/skills/$skill" | |
| tests_path = ".github/skills/$skill/tests" | |
| } | |
| } else { | |
| Write-Host " -> $skill has NO eval tests (static-only)" | |
| } | |
| } | |
| if ($entries.Count -eq 0) { | |
| Write-Host "No skills with eval tests to evaluate" | |
| echo "entries=[]" >> $env:GITHUB_OUTPUT | |
| echo "has_entries=false" >> $env:GITHUB_OUTPUT | |
| } else { | |
| $json = $entries | ConvertTo-Json -Compress -AsArray | |
| Write-Host "Entries to evaluate: $json" | |
| echo "entries=$json" >> $env:GITHUB_OUTPUT | |
| echo "has_entries=true" >> $env:GITHUB_OUTPUT | |
| } | |
| # ========================================================================== | |
| # LLM EVALUATION (matrix) | |
| # Runs skill-validator evaluate for each changed skill with eval tests. | |
| # ========================================================================== | |
| evaluate: | |
| name: evaluate (${{ matrix.entry.name }}) | |
| needs: [pr-gate, slash-gate, discover-eval] | |
| if: >- | |
| always() && !cancelled() && | |
| needs.discover-eval.result == 'success' && | |
| needs.discover-eval.outputs.has_entries == 'true' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| timeout-minutes: 120 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| entry: ${{ fromJson(needs.discover-eval.outputs.entries || '[]') }} | |
| steps: | |
| - name: Checkout PR content | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ needs.pr-gate.outputs.head_repo || needs.slash-gate.outputs.head_repo }} | |
| ref: ${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }} | |
| sparse-checkout: | | |
| .github/skills | |
| .github/plugin.json | |
| persist-credentials: false | |
| # ── Prepare test directory layout ───────────────────────────── | |
| # skill-validator evaluate expects tests at <tests-dir>/<skill>/eval.yaml | |
| # but maui keeps them co-located at .github/skills/<skill>/tests/eval.yaml. | |
| # Create a flat tests directory by copying files to match the expected layout. | |
| - name: Prepare test directory | |
| run: | | |
| mkdir -p eval-tests | |
| for dir in .github/skills/*/tests; do | |
| [ -d "$dir" ] || continue | |
| [ -f "$dir/eval.yaml" ] || continue | |
| skill=$(basename $(dirname "$dir")) | |
| mkdir -p "eval-tests/$skill" | |
| # Copy eval.yaml and any fixture files | |
| cp -r "$dir"/* "eval-tests/$skill/" | |
| done | |
| echo "Prepared test directories:" | |
| find eval-tests -name 'eval.yaml' | sort | |
| # ── Download & cache skill-validator ────────────────────────── | |
| - name: Get cache key date | |
| id: cache-date | |
| run: echo "date=$(date +%Y-%m-%d)" >> "$GITHUB_OUTPUT" | |
| - name: Restore skill-validator from cache | |
| id: cache-sv | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: skill-validator-bin | |
| key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} | |
| restore-keys: | | |
| ${{ env.VALIDATOR_CACHE_PREFIX }}- | |
| - name: Download skill-validator | |
| if: steps.cache-sv.outputs.cache-hit != 'true' | |
| run: | | |
| mkdir -p skill-validator-bin | |
| curl -fsSL --retry 3 --retry-all-errors -o skill-validator.tar.gz \ | |
| https://github.com/dotnet/skills/releases/download/skill-validator-nightly/skill-validator-linux-x64.tar.gz | |
| tar -xzf skill-validator.tar.gz -C skill-validator-bin | |
| if [ ! -f skill-validator-bin/skill-validator ]; then | |
| echo "::error::skill-validator binary not found after extraction" | |
| exit 1 | |
| fi | |
| chmod +x skill-validator-bin/skill-validator | |
| - name: Save skill-validator to cache | |
| if: steps.cache-sv.outputs.cache-hit != 'true' | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: skill-validator-bin | |
| key: ${{ env.VALIDATOR_CACHE_PREFIX }}-${{ steps.cache-date.outputs.date }} | |
| # ── Select Copilot token ────────────────────────────────────── | |
| - name: Select Copilot token | |
| id: select-token | |
| env: | |
| TOKEN_1: ${{ secrets.COPILOT_GITHUB_TOKEN }} | |
| TOKEN_2: ${{ secrets.COPILOT_GITHUB_TOKEN_2 }} | |
| TOKEN_3: ${{ secrets.COPILOT_GITHUB_TOKEN_3 }} | |
| run: | | |
| TOKENS=() | |
| NAMES=() | |
| for i in 1 2 3; do | |
| var="TOKEN_$i" | |
| val="${!var}" | |
| if [ -n "$val" ]; then | |
| TOKENS+=("$val") | |
| if [ "$i" -eq 1 ]; then | |
| NAMES+=("COPILOT_GITHUB_TOKEN") | |
| else | |
| NAMES+=("COPILOT_GITHUB_TOKEN_$i") | |
| fi | |
| fi | |
| done | |
| if [ ${#TOKENS[@]} -eq 0 ]; then | |
| echo "::error::No COPILOT_GITHUB_TOKEN secrets are configured" | |
| exit 1 | |
| fi | |
| JOB_INDEX="${{ strategy.job-index }}" | |
| RUN_ID="${{ github.run_id }}" | |
| if [ -n "$JOB_INDEX" ] && [ ${#TOKENS[@]} -gt 1 ]; then | |
| IDX=$(( (JOB_INDEX + RUN_ID) % ${#TOKENS[@]} )) | |
| elif [ -n "$JOB_INDEX" ]; then | |
| IDX=0 | |
| else | |
| IDX=$((RANDOM % ${#TOKENS[@]})) | |
| fi | |
| echo "Selected ${NAMES[$IDX]} (1 of ${#TOKENS[@]} available tokens, job-index=${JOB_INDEX:-random})" | |
| echo "::add-mask::${TOKENS[$IDX]}" | |
| echo "token=${TOKENS[$IDX]}" >> $GITHUB_OUTPUT | |
| # ── Run LLM evaluation ─────────────────────────────────────── | |
| - name: Run skill-validator evaluate | |
| id: eval-run | |
| env: | |
| COPILOT_TOKEN: ${{ steps.select-token.outputs.token }} | |
| RESULTS_PATH: eval-results/${{ matrix.entry.name }} | |
| SKILLS_PATH: ${{ matrix.entry.skills_path }} | |
| run: | | |
| # skill-validator reads GITHUB_TOKEN for API access | |
| export GITHUB_TOKEN="$COPILOT_TOKEN" | |
| ARGS="--verdict-warn-only --verbose" | |
| ARGS="$ARGS --results-dir $RESULTS_PATH --reporter console --reporter json --reporter markdown" | |
| ARGS="$ARGS --model claude-opus-4.6" | |
| ARGS="$ARGS --judge-model claude-opus-4.6" | |
| ARGS="$ARGS --runs 3" | |
| ARGS="$ARGS --parallel-skills 2" | |
| ARGS="$ARGS --parallel-scenarios 3" | |
| ARGS="$ARGS --parallel-runs 3" | |
| set +e | |
| skill-validator-bin/skill-validator evaluate $ARGS \ | |
| --tests-dir eval-tests \ | |
| "$SKILLS_PATH" | |
| EVAL_RC=$? | |
| set -e | |
| echo "eval_exit_code=$EVAL_RC" >> $GITHUB_OUTPUT | |
| # Determine actual pass/fail from results.json (the source of truth) | |
| RESULTS_JSON=$(find "$RESULTS_PATH" -name 'results.json' -type f | head -1) | |
| if [ -n "$RESULTS_JSON" ]; then | |
| ALL_PASSED=$(jq 'if .verdicts | length == 0 then false else all(.verdicts[]; .passed) end' "$RESULTS_JSON") | |
| echo "eval_passed=$ALL_PASSED" >> $GITHUB_OUTPUT | |
| else | |
| echo "eval_passed=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: skill-eval-results-${{ matrix.entry.name }} | |
| path: eval-results/${{ matrix.entry.name }}/ | |
| include-hidden-files: true | |
| retention-days: 14 | |
| # ========================================================================== | |
| # POST PR COMMENT | |
| # Consolidated results (static + eval) posted directly to the PR. | |
| # pull_request_target has write permissions, so no separate workflow needed. | |
| # ========================================================================== | |
| comment: | |
| name: Post results comment | |
| needs: [pr-gate, slash-gate, static-check, discover-eval, evaluate] | |
| if: >- | |
| always() && !cancelled() && ( | |
| needs.pr-gate.result == 'success' || | |
| needs.slash-gate.result == 'success' | |
| ) | |
| runs-on: ubuntu-latest | |
| permissions: | |
| pull-requests: write | |
| issues: write | |
| outputs: | |
| eval_passed: ${{ steps.post-comment.outputs.eval_passed }} | |
| steps: | |
| - name: Download static check results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: static-check-results | |
| path: static-results/ | |
| continue-on-error: true | |
| - name: Download eval result artifacts | |
| if: needs.evaluate.result == 'success' || needs.evaluate.result == 'failure' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: skill-eval-results-* | |
| path: eval-results/ | |
| merge-multiple: false | |
| continue-on-error: true | |
| - name: Post comment | |
| id: post-comment | |
| uses: actions/github-script@v7 | |
| env: | |
| PR_NUMBER: ${{ needs.pr-gate.outputs.pr_number || needs.slash-gate.outputs.pr_number }} | |
| STATIC_RESULT: ${{ needs.static-check.result }} | |
| EVAL_RESULT: ${{ needs.evaluate.result }} | |
| HAS_ENTRIES: ${{ needs.discover-eval.outputs.has_entries }} | |
| DISCOVER_RESULT: ${{ needs.discover-eval.result }} | |
| IS_CONTRIBUTOR: ${{ needs.pr-gate.outputs.is_contributor || 'true' }} | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const prNumber = parseInt(process.env.PR_NUMBER, 10); | |
| const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | |
| const marker = '<!-- skill-validation-results -->'; | |
| const staticResult = process.env.STATIC_RESULT; | |
| const evalResult = process.env.EVAL_RESULT; | |
| const hasEntries = process.env.HAS_ENTRIES === 'true'; | |
| const discoverResult = process.env.DISCOVER_RESULT; | |
| const isContributor = process.env.IS_CONTRIBUTOR === 'true'; | |
| const evalRan = discoverResult === 'success'; | |
| const lines = [marker, '## 🔍 Skill Validation Results', '']; | |
| // ── Static check section ────────────────────────────── | |
| let staticOutput = ''; | |
| try { | |
| if (fs.existsSync('static-results/sv-output.txt')) { | |
| staticOutput = fs.readFileSync('static-results/sv-output.txt', 'utf8') | |
| .replace(/\x1b\[[0-9;]*m/g, '').trim(); | |
| } | |
| } catch (e) { /* ignore */ } | |
| const exitCode = (() => { | |
| try { return fs.readFileSync('static-results/exit-code.txt', 'utf8').trim(); } | |
| catch { return '?'; } | |
| })(); | |
| const skillCount = (() => { | |
| try { return fs.readFileSync('static-results/skill-count.txt', 'utf8').trim(); } | |
| catch { return '?'; } | |
| })(); | |
| const agentCount = (() => { | |
| try { return fs.readFileSync('static-results/agent-count.txt', 'utf8').trim(); } | |
| catch { return '?'; } | |
| })(); | |
| if (staticResult === 'success') { | |
| lines.push('### ✅ Static Checks Passed'); | |
| } else if (staticResult === 'failure') { | |
| lines.push('### ❌ Static Checks Failed'); | |
| } else { | |
| lines.push(`### ⚠️ Static Checks: ${staticResult}`); | |
| } | |
| lines.push(`Skills checked: ${skillCount} | Agents checked: ${agentCount}`); | |
| lines.push(''); | |
| if (staticOutput) { | |
| const findings = staticOutput.split('\n') | |
| .map(l => l.trim()) | |
| .filter(l => /^[❌⚠ℹ]/.test(l)) | |
| .slice(0, 10); | |
| if (findings.length > 0) { | |
| lines.push('| Level | Finding |'); | |
| lines.push('|---|---|'); | |
| for (const line of findings) { | |
| const level = line.startsWith('❌') ? '❌' | |
| : line.startsWith('⚠') ? '⚠️' | |
| : 'ℹ️'; | |
| const text = line.replace(/^[❌⚠ℹ️\s]+/, '').replace(/\|/g, '\\|'); | |
| lines.push(`| ${level} | ${text} |`); | |
| } | |
| lines.push(''); | |
| } | |
| lines.push('<details>'); | |
| lines.push('<summary>Full validator output</summary>'); | |
| lines.push(''); | |
| lines.push('```text'); | |
| lines.push(staticOutput.replace(/```/g, '` ` `')); | |
| lines.push('```'); | |
| lines.push(''); | |
| lines.push('</details>'); | |
| lines.push(''); | |
| } | |
| // ── Parse eval results from JSON ────────────────────── | |
| // Read results.json files from downloaded artifacts to determine | |
| // actual pass/fail (the source of truth, not the job exit code | |
| // which uses --verdict-warn-only). | |
| let allVerdicts = []; | |
| let evalPassed = true; | |
| let hasResults = false; | |
| const footnotes = []; | |
| if (fs.existsSync('eval-results')) { | |
| try { | |
| const resultDirs = fs.readdirSync('eval-results').filter(d => | |
| fs.statSync(path.join('eval-results', d)).isDirectory() | |
| ); | |
| for (const dir of resultDirs) { | |
| const dirPath = path.join('eval-results', dir); | |
| // Recursively find results.json | |
| const allFiles = []; | |
| function walkDir(d) { | |
| for (const f of fs.readdirSync(d)) { | |
| const fp = path.join(d, f); | |
| if (fs.statSync(fp).isDirectory()) walkDir(fp); | |
| else allFiles.push(path.relative(dirPath, fp)); | |
| } | |
| } | |
| walkDir(dirPath); | |
| const jsonFile = allFiles.find(f => f.endsWith('results.json')); | |
| if (jsonFile) { | |
| hasResults = true; | |
| const data = JSON.parse( | |
| fs.readFileSync(path.join(dirPath, jsonFile), 'utf8') | |
| ); | |
| if (data.verdicts && data.verdicts.length > 0) { | |
| allVerdicts.push(...data.verdicts); | |
| for (const v of data.verdicts) { | |
| if (!v.passed) evalPassed = false; | |
| } | |
| } else { | |
| evalPassed = false; // no verdicts = not passed | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| console.log('Error reading eval results JSON:', e.message); | |
| } | |
| } | |
| // ── LLM evaluation section ──────────────────────────── | |
| if (!evalRan && !isContributor) { | |
| lines.push('### ⏭️ LLM Evaluation: Skipped'); | |
| lines.push(''); | |
| lines.push('> 💡 LLM evaluation was not run for this external PR.'); | |
| lines.push('> A repository contributor can post `/evaluate-skills` on this PR to trigger full evaluation.'); | |
| lines.push(''); | |
| } else if (!hasEntries) { | |
| lines.push('### ⏭️ LLM Evaluation: Skipped'); | |
| lines.push('_No changed skills with eval tests found._'); | |
| lines.push(''); | |
| } else if (hasResults) { | |
| // Use actual results from JSON to determine status | |
| if (evalPassed) { | |
| lines.push('### ✅ LLM Evaluation Passed'); | |
| } else { | |
| lines.push('### ❌ LLM Evaluation Failed'); | |
| } | |
| const passedCount = allVerdicts.filter(v => v.passed).length; | |
| lines.push(`${passedCount}/${allVerdicts.length} skill(s) passed validation`); | |
| lines.push(''); | |
| // ── Build results table ───────────────────────────── | |
| if (allVerdicts.length > 0) { | |
| lines.push('| Skill | Scenario | Baseline | Skilled | Verdict |'); | |
| lines.push('|-------|----------|----------|---------|---------|'); | |
| let fnIndex = 0; | |
| for (const verdict of allVerdicts) { | |
| const scenarios = verdict.scenarios || []; | |
| for (const sc of scenarios) { | |
| const baseScore = sc.baseline?.judgeResult?.overallScore; | |
| const isolatedScore = sc.skilledIsolated?.judgeResult?.overallScore; | |
| const pluginScore = sc.skilledPlugin?.judgeResult?.overallScore; | |
| // Format scores | |
| const baseStr = baseScore != null ? `${baseScore.toFixed(1)}/5` : '—'; | |
| // Pick the best skilled score (isolated or plugin) | |
| let skilledStr; | |
| if (isolatedScore != null && pluginScore != null) { | |
| skilledStr = `${isolatedScore.toFixed(1)}/5 (iso) · ${pluginScore.toFixed(1)}/5 (plug)`; | |
| } else if (isolatedScore != null) { | |
| skilledStr = `${isolatedScore.toFixed(1)}/5`; | |
| } else if (pluginScore != null) { | |
| skilledStr = `${pluginScore.toFixed(1)}/5`; | |
| } else { | |
| skilledStr = '—'; | |
| } | |
| // Timeout indicator | |
| const timeoutFlag = sc.timedOut ? ' ⏳' : ''; | |
| // Verdict icon — per-scenario: improvement >= 0 means not regressed | |
| const improvement = sc.improvementScore || 0; | |
| const scenarioIcon = improvement >= 0 ? '✅' : '⚠️'; | |
| // Footnote for high variance or timeout | |
| let footRef = ''; | |
| if (sc.highVariance || sc.timedOut) { | |
| fnIndex++; | |
| const parts = []; | |
| if (sc.highVariance) parts.push(`High run-to-run variance (CV=${(sc.varianceCV || 0).toFixed(2)})`); | |
| if (sc.timedOut) parts.push(`Timeout at ${sc.timeoutSeconds || '?'}s`); | |
| footRef = ` <a href="#user-content-fn-${fnIndex}" id="ref-${fnIndex}">[${fnIndex}]</a>`; | |
| footnotes.push(`<a href="#user-content-ref-${fnIndex}" id="fn-${fnIndex}"><strong>[${fnIndex}]</strong></a> ${parts.join('. ')}`); | |
| } | |
| const safeSkillName = (verdict.skillName || '').replace(/\|/g, '\\|'); | |
| const safeScenarioName = (sc.scenarioName || '').replace(/\|/g, '\\|'); | |
| lines.push(`| ${safeSkillName} | ${safeScenarioName} | ${baseStr}${timeoutFlag} | ${skilledStr}${timeoutFlag} | ${scenarioIcon}${footRef} |`); | |
| } | |
| } | |
| lines.push(''); | |
| // Overall verdict line per skill | |
| for (const verdict of allVerdicts) { | |
| const icon = verdict.passed ? '✅' : '❌'; | |
| const reason = (verdict.reason || '').replace(/\|/g, '\\|'); | |
| const safeSkillNameSummary = (verdict.skillName || '').replace(/\|/g, '\\|'); | |
| lines.push(`${icon} **${safeSkillNameSummary}**: ${reason}`); | |
| lines.push(''); | |
| } | |
| // Footnotes | |
| if (footnotes.length > 0) { | |
| for (const fn of footnotes) { | |
| lines.push(fn); | |
| } | |
| lines.push(''); | |
| } | |
| // Timeout warning | |
| const hasTimeout = allVerdicts.some(v => | |
| (v.scenarios || []).some(s => s.timedOut) | |
| ); | |
| if (hasTimeout) { | |
| lines.push('> ⏳ **timeout** — run(s) hit the scenario timeout limit; scoring may be impacted'); | |
| lines.push(''); | |
| } | |
| } | |
| } else if (evalResult === 'success') { | |
| lines.push('### ✅ LLM Evaluation Passed'); | |
| lines.push(''); | |
| } else if (evalResult === 'failure') { | |
| lines.push('### ❌ LLM Evaluation Failed'); | |
| lines.push(''); | |
| } else if (evalResult === 'skipped') { | |
| lines.push('### ⏭️ LLM Evaluation: Skipped'); | |
| lines.push(''); | |
| } else { | |
| lines.push(`### ⚠️ LLM Evaluation: ${evalResult}`); | |
| lines.push(''); | |
| } | |
| // Detailed judge reports in collapsible sections | |
| if (fs.existsSync('eval-results')) { | |
| try { | |
| const resultDirs = fs.readdirSync('eval-results').filter(d => | |
| fs.statSync(path.join('eval-results', d)).isDirectory() | |
| ); | |
| for (const dir of resultDirs) { | |
| const skillName = dir.replace('skill-eval-results-', ''); | |
| const dirPath = path.join('eval-results', dir); | |
| const allFiles = []; | |
| function walkDir2(d) { | |
| for (const f of fs.readdirSync(d)) { | |
| const fp = path.join(d, f); | |
| if (fs.statSync(fp).isDirectory()) walkDir2(fp); | |
| else allFiles.push(path.relative(dirPath, fp)); | |
| } | |
| } | |
| walkDir2(dirPath); | |
| // Include per-scenario judge reports (not summary.md which duplicates the table) | |
| const mdFiles = allFiles.filter(f => | |
| f.endsWith('.md') && !f.endsWith('summary.md') | |
| ); | |
| for (const mdFile of mdFiles) { | |
| const mdContent = fs.readFileSync( | |
| path.join(dirPath, mdFile), 'utf8' | |
| ).trim(); | |
| if (mdContent.length > 0) { | |
| const scenarioName = path.basename(mdFile, '.md'); | |
| lines.push(`<details>`); | |
| lines.push(`<summary>📊 ${skillName} / ${scenarioName}</summary>`); | |
| lines.push(''); | |
| lines.push(mdContent.replace(/```/g, '` ` `').replace(/<\/details>/gi, '</details>')); | |
| lines.push(''); | |
| lines.push('</details>'); | |
| lines.push(''); | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| console.log('Error reading eval result details:', e.message); | |
| } | |
| } | |
| // ── Investigation prompt for failures ───────────────── | |
| // When any evaluated skill failed, build a copy-paste prompt | |
| // that tells the user how to download artifacts and investigate | |
| // with their AI coding agent (same pattern as dotnet/skills). | |
| let investigatePrompt = ''; | |
| if (hasResults && !evalPassed) { | |
| const runId = context.runId; | |
| const repo = `${context.repo.owner}/${context.repo.repo}`; | |
| investigatePrompt = [ | |
| '', | |
| '> **To investigate failures**, paste this to your AI coding agent:', | |
| '>', | |
| `> _For PR #${prNumber} in ${repo}, download eval artifacts with ` + | |
| `\`gh run download ${runId} --repo ${repo} --pattern "skill-eval-results-*" --dir ./eval-results\`, ` + | |
| `then fetch https://raw.githubusercontent.com/dotnet/skills/main/eng/skill-validator/src/docs/InvestigatingResults.md ` + | |
| `and follow it to analyze the results.json files. Diagnose each failure, suggest fixes to the eval.yaml ` + | |
| `and skill content, and tell me what to fix first._`, | |
| ].join('\n'); | |
| } | |
| // ── Pipeline link (styled like dotnet/skills) ───────── | |
| lines.push(`[🔍 Full results and investigation steps](${runUrl})`); | |
| const body = lines.join('\n'); | |
| // ── Write step summary with investigation prompt ────── | |
| const summaryPath = process.env.GITHUB_STEP_SUMMARY; | |
| if (summaryPath) { | |
| const summaryLines = ['## Skill Validation Results', '']; | |
| summaryLines.push(body.replace(marker, '').trim()); | |
| if (investigatePrompt) { | |
| summaryLines.push(investigatePrompt); | |
| } | |
| fs.appendFileSync(summaryPath, summaryLines.join('\n') + '\n'); | |
| } | |
| // Upsert comment (find existing with marker, update or create) | |
| // Paginate to handle PRs with 100+ comments | |
| const comments = await github.paginate( | |
| github.rest.issues.listComments, | |
| { | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: prNumber, | |
| per_page: 100, | |
| } | |
| ); | |
| const existing = comments.find(c => c.body && c.body.includes(marker)); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existing.id, | |
| body, | |
| }); | |
| console.log(`Updated existing comment ${existing.id}`); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: prNumber, | |
| body, | |
| }); | |
| console.log('Created new PR comment'); | |
| } | |
| // Save eval pass/fail for downstream jobs | |
| const outputPath = process.env.GITHUB_OUTPUT; | |
| if (outputPath) { | |
| fs.appendFileSync(outputPath, `eval_passed=${hasResults ? evalPassed : 'na'}\n`); | |
| } | |
| # ========================================================================== | |
| # REPORT STATUS | |
| # Post final commit status on PR head SHA. | |
| # ========================================================================== | |
| report-status: | |
| name: Report status | |
| needs: [pr-gate, slash-gate, static-check, discover-eval, evaluate, comment] | |
| if: >- | |
| always() && !cancelled() && ( | |
| needs.pr-gate.result == 'success' || | |
| needs.slash-gate.result == 'success' | |
| ) | |
| runs-on: ubuntu-latest | |
| permissions: | |
| statuses: write | |
| checks: write | |
| issues: write | |
| steps: | |
| - name: Set commit status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| HEAD_SHA="${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }}" | |
| if [ -z "$HEAD_SHA" ]; then | |
| echo "No head SHA (workflow_dispatch?) — skipping status" | |
| exit 0 | |
| fi | |
| STATIC="${{ needs.static-check.result }}" | |
| EVAL="${{ needs.evaluate.result }}" | |
| DISCOVER="${{ needs.discover-eval.result }}" | |
| HAS_ENTRIES="${{ needs.discover-eval.outputs.has_entries }}" | |
| EVAL_PASSED="${{ needs.comment.outputs.eval_passed }}" | |
| if [[ "$STATIC" == "success" ]]; then | |
| if [[ "$DISCOVER" != "success" || "$HAS_ENTRIES" != "true" ]]; then | |
| STATE="success" | |
| DESC="Skill validation passed (static only)" | |
| elif [[ "$EVAL_PASSED" == "true" ]]; then | |
| STATE="success" | |
| DESC="Skill validation passed" | |
| elif [[ "$EVAL_PASSED" == "false" ]]; then | |
| STATE="failure" | |
| DESC="LLM evaluation failed" | |
| elif [[ "$EVAL" == "failure" ]]; then | |
| STATE="failure" | |
| DESC="LLM evaluation failed" | |
| else | |
| STATE="error" | |
| DESC="Evaluation incomplete ($EVAL)" | |
| fi | |
| elif [[ "$STATIC" == "failure" ]]; then | |
| STATE="failure" | |
| DESC="Static validation failed" | |
| else | |
| STATE="error" | |
| DESC="Validation incomplete (static: $STATIC)" | |
| fi | |
| # Post commit status (appears in PR status checks) | |
| gh api "repos/${{ github.repository }}/statuses/${HEAD_SHA}" \ | |
| -f state="$STATE" \ | |
| -f context="skill-validation" \ | |
| -f description="$DESC" \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # Create a Check Run on the PR head SHA so it shows in the PR Checks tab. | |
| # pull_request_target runs are associated with the base branch, not the PR, | |
| # so without this the workflow link won't appear on the PR. | |
| - name: Create check run | |
| if: always() | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const headSha = '${{ needs.pr-gate.outputs.head_sha || needs.slash-gate.outputs.head_sha }}'; | |
| if (!headSha) return; | |
| const staticResult = '${{ needs.static-check.result }}'; | |
| const evalPassed = '${{ needs.comment.outputs.eval_passed }}'; | |
| const evalResult = '${{ needs.evaluate.result }}'; | |
| const hasEntries = '${{ needs.discover-eval.outputs.has_entries }}' === 'true'; | |
| let conclusion, title; | |
| if (staticResult !== 'success') { | |
| conclusion = 'failure'; | |
| title = 'Static validation failed'; | |
| } else if (!hasEntries) { | |
| conclusion = 'success'; | |
| title = 'Skill validation passed (static only)'; | |
| } else if (evalPassed === 'true') { | |
| conclusion = 'success'; | |
| title = 'Skill validation passed'; | |
| } else if (evalPassed === 'false') { | |
| conclusion = 'failure'; | |
| title = 'LLM evaluation failed'; | |
| } else if (evalResult === 'failure') { | |
| conclusion = 'failure'; | |
| title = 'LLM evaluation failed'; | |
| } else { | |
| conclusion = 'neutral'; | |
| title = `Evaluation: ${evalResult}`; | |
| } | |
| await github.rest.checks.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| name: 'Skill Validation', | |
| head_sha: headSha, | |
| status: 'completed', | |
| conclusion, | |
| output: { | |
| title, | |
| summary: `[View full results](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})`, | |
| }, | |
| details_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, | |
| }); | |
| console.log(`Created check run: ${conclusion} - ${title}`); | |
| # Remove eyes reaction (slash command only) | |
| - name: Remove reaction | |
| if: needs.slash-gate.result == 'success' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| COMMENT_ID="${{ github.event.comment.id }}" | |
| REACTION_ID=$(gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions" \ | |
| --jq '.[] | select(.content == "eyes" and .user.login == "github-actions[bot]") | .id' \ | |
| | head -1 || echo "") | |
| if [[ -n "$REACTION_ID" && "$REACTION_ID" != "null" ]]; then | |
| gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}/reactions/${REACTION_ID}" \ | |
| -X DELETE || true | |
| fi |