|
| 1 | +#!/usr/bin/env -S uv run --quiet |
| 2 | +# /// script |
| 3 | +# requires-python = ">=3.10" |
| 4 | +# dependencies = [] |
| 5 | +# /// |
| 6 | +"""Select which skills SkillSpector should scan based on a git diff. |
| 7 | +
|
| 8 | +The SkillSpector per-skill matrix used to fan out over *every* skill on every |
| 9 | +run, so a one-line edit to a single skill triggered a full re-scan of the whole |
| 10 | +catalog. This script narrows the matrix to just the skills that actually |
| 11 | +changed. |
| 12 | +
|
| 13 | +Behaviour: |
| 14 | +
|
| 15 | + * Diff the working tree against ``--base`` and collect the changed paths. |
| 16 | + * If any *infra* path changed (this script, the SkillSpector workflow, the |
| 17 | + gate, or the allowlist), scan EVERY skill -- a change to the scanning |
| 18 | + machinery can affect the result for all skills. |
| 19 | + * Otherwise, scan only the skills with changes under ``skills/<name>/``. |
| 20 | + * Print the selected skill names as a compact JSON array (for a CI matrix). |
| 21 | + The array may be empty, in which case there is nothing to scan. |
| 22 | +
|
| 23 | +If the base ref is missing or unresolvable (a manual ``workflow_dispatch`` run, |
| 24 | +a brand-new branch with no parent, a shallow checkout, ...), fall back to |
| 25 | +scanning every skill so we never silently skip a scan. |
| 26 | +
|
| 27 | +Usage: |
| 28 | +
|
| 29 | + uv run .github/scripts/changed_skills.py --base "$BASE_SHA" |
| 30 | + uv run .github/scripts/changed_skills.py --base origin/main --skills-dir skills |
| 31 | +""" |
| 32 | + |
| 33 | +from __future__ import annotations |
| 34 | + |
| 35 | +import argparse |
| 36 | +import json |
| 37 | +import subprocess |
| 38 | +import sys |
| 39 | +from pathlib import Path |
| 40 | + |
| 41 | +REPO_ROOT = Path(__file__).resolve().parent.parent.parent |
| 42 | +DEFAULT_SKILLS_DIR = REPO_ROOT / "skills" |
| 43 | + |
| 44 | +# Paths that, when touched, force a full re-scan of every skill because they |
| 45 | +# change the scanning machinery itself rather than a single skill's content. |
| 46 | +INFRA_PATHS = ( |
| 47 | + ".github/workflows/skillspector.yml", |
| 48 | + ".github/scripts/skillspector_gate.py", |
| 49 | + ".github/scripts/changed_skills.py", |
| 50 | + ".github/skillspector-allow.yml", |
| 51 | +) |
| 52 | + |
| 53 | + |
| 54 | +def discover_skills(root: Path) -> list[str]: |
| 55 | + """List skill directory names under `root`, ignoring dotfiles.""" |
| 56 | + if not root.exists(): |
| 57 | + return [] |
| 58 | + return sorted( |
| 59 | + p.name for p in root.iterdir() if p.is_dir() and not p.name.startswith(".") |
| 60 | + ) |
| 61 | + |
| 62 | + |
| 63 | +def _ref_exists(ref: str) -> bool: |
| 64 | + """Return True if `ref` resolves to a commit in the local repo.""" |
| 65 | + result = subprocess.run( |
| 66 | + ["git", "rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"], |
| 67 | + cwd=REPO_ROOT, |
| 68 | + capture_output=True, |
| 69 | + text=True, |
| 70 | + ) |
| 71 | + return result.returncode == 0 |
| 72 | + |
| 73 | + |
| 74 | +def changed_paths(base: str) -> list[str]: |
| 75 | + """Return paths changed between `base` and the working tree's HEAD. |
| 76 | +
|
| 77 | + Uses a three-dot diff so the comparison is against the merge base of |
| 78 | + `base` and HEAD -- the set of changes introduced on this branch/PR. |
| 79 | + """ |
| 80 | + result = subprocess.run( |
| 81 | + ["git", "diff", "--name-only", f"{base}...HEAD"], |
| 82 | + cwd=REPO_ROOT, |
| 83 | + capture_output=True, |
| 84 | + text=True, |
| 85 | + check=True, |
| 86 | + ) |
| 87 | + return [line.strip() for line in result.stdout.splitlines() if line.strip()] |
| 88 | + |
| 89 | + |
| 90 | +def select_skills(base: str | None, skills_dir: Path) -> list[str]: |
| 91 | + """Pick the skills to scan for the given diff base. |
| 92 | +
|
| 93 | + Returns every skill when the base is unusable or when infra changed, and |
| 94 | + only the changed skills otherwise. |
| 95 | + """ |
| 96 | + all_skills = discover_skills(skills_dir) |
| 97 | + |
| 98 | + if not base or not _ref_exists(base): |
| 99 | + print( |
| 100 | + f"Base ref {base!r} is missing or unresolvable; scanning all skills.", |
| 101 | + file=sys.stderr, |
| 102 | + ) |
| 103 | + return all_skills |
| 104 | + |
| 105 | + try: |
| 106 | + paths = changed_paths(base) |
| 107 | + except subprocess.CalledProcessError as exc: |
| 108 | + print( |
| 109 | + f"git diff against {base!r} failed ({exc}); scanning all skills.", |
| 110 | + file=sys.stderr, |
| 111 | + ) |
| 112 | + return all_skills |
| 113 | + |
| 114 | + if any(p in INFRA_PATHS for p in paths): |
| 115 | + print("SkillSpector infra changed; scanning all skills.", file=sys.stderr) |
| 116 | + return all_skills |
| 117 | + |
| 118 | + prefix = f"{skills_dir.relative_to(REPO_ROOT).as_posix()}/" |
| 119 | + changed: set[str] = set() |
| 120 | + known = set(all_skills) |
| 121 | + for path in paths: |
| 122 | + if not path.startswith(prefix): |
| 123 | + continue |
| 124 | + name = path[len(prefix) :].split("/", 1)[0] |
| 125 | + # Only scan skills that still exist on disk (skip pure deletions). |
| 126 | + if name in known: |
| 127 | + changed.add(name) |
| 128 | + |
| 129 | + return sorted(changed) |
| 130 | + |
| 131 | + |
| 132 | +def main(argv: list[str] | None = None) -> int: |
| 133 | + parser = argparse.ArgumentParser( |
| 134 | + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter |
| 135 | + ) |
| 136 | + parser.add_argument( |
| 137 | + "--base", |
| 138 | + default="", |
| 139 | + help="Git ref/SHA to diff against. Empty or unresolvable means " |
| 140 | + "scan every skill.", |
| 141 | + ) |
| 142 | + parser.add_argument( |
| 143 | + "--skills-dir", |
| 144 | + type=Path, |
| 145 | + default=DEFAULT_SKILLS_DIR, |
| 146 | + help=f"Directory containing skill folders (default: {DEFAULT_SKILLS_DIR}).", |
| 147 | + ) |
| 148 | + args = parser.parse_args(argv) |
| 149 | + |
| 150 | + skills = select_skills(args.base, args.skills_dir.resolve()) |
| 151 | + print(json.dumps(skills, separators=(",", ":"))) |
| 152 | + return 0 |
| 153 | + |
| 154 | + |
| 155 | +if __name__ == "__main__": |
| 156 | + raise SystemExit(main()) |
0 commit comments