amd
diff --git a/‎.github/skillspector-allow.yml‎
Lines changed: 113 additions & 0 deletions b/‎.github/skillspector-allow.yml‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎.github/workflows/skillspector.yml‎
Lines changed: 124 additions & 0 deletions b/‎.github/workflows/skillspector.yml‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎eval/behavioral/conftest.py‎
Lines changed: 28 additions & 0 deletions b/‎eval/behavioral/conftest.py‎
Lines changed: 28 additions & 0 deletions
@@ -0,0 +1,113 @@
+# SkillSpector false-positive allowlist.
+#
+# SkillSpector's static scan is high-recall / moderate-precision and has no
+# native per-finding suppression. This file is the auditable place to record
+# findings that are genuinely false positives so the CI gate
+# (scripts/skillspector_gate.py) does not fail on them. Everything not listed
+# here still fails the build at HIGH/CRITICAL.
+#
+# Each entry suppresses ONE rule for ONE file within ONE skill:
+#   skill:  skill directory name under skills/
+#   rule:   SkillSpector rule id (e.g. YR1)
+#   file:   path as it appears in the report, relative to the skill dir
+#   match:  (optional) substring that must appear in the finding message, so
+#           the suppression stays scoped to the specific signature
+#   reason: why this is a false positive (keep it accurate and specific)
+#
+# Add entries sparingly and only when the finding is demonstrably benign.
+
+suppressions:
+  - skill: rocm-doctor
+    rule: YR1
+    file: scripts/apply_fix.py
+    match: backdoor_persistence
+    reason: >-
+      False positive. The 'backdoor_persistence' YARA rule's $bashrc_persist
+      string matches any `echo ... >> ~/.bashrc`. Here it is the documented
+      remediation that appends `export PATH="/opt/rocm/bin:$PATH"` so ROCm
+      binaries land on PATH after install. Standard ROCm setup guidance, not a
+      persistence backdoor or payload.
+  - skill: rocm-doctor
+    rule: YR1
+    file: scripts/diagnose.py
+    match: backdoor_persistence
+    reason: >-
+      False positive. Same $bashrc_persist match: diagnose.py prints the
+      remediation command `echo 'export PATH=<bin>:$PATH' >> ~/.bashrc` (or
+      ~/.zshrc) for the user to add ROCm/HIP to PATH. No payload, no SSH key
+      injection, no hidden user.
+  - skill: rocm-doctor
+    rule: OH1
+    file: scripts/apply_fix.py
+    match: Unvalidated Output Injection
+    reason: >-
+      False positive. The flag is on the generic `_run(cmd: list[str], ...)`
+      helper, which calls `subprocess.run(cmd, ..., shell defaults to False)`
+      with a list-form argv, so there is no shell interpolation. Every `cmd`
+      is a hardcoded argv list assembled in-script (e.g.
+      `["usermod","-a","-G","render,video",user]`, `["modprobe","amdgpu"]`);
+      the only dynamic pieces are the local username from `$USER`/`$LOGNAME`
+      and binary paths resolved via `shutil.which`. No LLM/model output ever
+      reaches this sink, so there is nothing to validate or sanitize.
+  - skill: rocm-doctor
+    rule: OH1
+    file: scripts/examine.py
+    match: Unvalidated Output Injection
+    reason: >-
+      False positive. Same generic `_run(cmd: list[str], ...)` helper as in
+      apply_fix.py: list-form `subprocess.run` with no shell=True. The read-only
+      probes only ever pass fixed argv lists (`["rocminfo"]`,
+      `["lspci","-nn","-D"]`, the PowerShell/CIM `Get-CimInstance` probes, the
+      framework binary from `shutil.which`). No model output flows into the
+      command, and there is no shell to inject into.
+  - skill: rocm-doctor
+    rule: PE3
+    file: scripts/examine.py
+    match: Credential Access
+    reason: >-
+      False positive. Line 493 is a code comment ("Resolve uid/gid to names via
+      /etc/passwd & /etc/group") describing how `_stat_device` maps a device's
+      owner uid/gid to names. The actual resolution uses the stdlib `pwd`/`grp`
+      modules (`pwd.getpwuid` / `grp.getgrgid`), not any read of /etc/passwd,
+      /etc/shadow, .env, or token files. No credential material is accessed.
+  - skill: local-ai-use
+    rule: SC2
+    file: SKILL.md
+    match: External Script Fetching
+    reason: >-
+      False positive. The flagged `curl ... | python -c ...` is not fetching or
+      executing a remote script: `curl` POSTs an image-generation request to the
+      local loopback Lemonade Server, and the piped `python -c` only
+      base64-decodes the JSON response body and writes it to `out.png`. No
+      remote code is downloaded or run.
+  - skill: local-ai-use
+    rule: SC2
+    file: templates/local-ai-rule.md
+    match: External Script Fetching
+    reason: >-
+      False positive. Same pattern as SKILL.md: the `curl ... | python -c ...`
+      in the installable rule template POSTs to the local Lemonade Server and
+      pipes the JSON response into `python -c` purely to base64-decode the image
+      bytes into `out.png`. No remote script is fetched or executed.
+  - skill: local-ai-use
+    rule: OH1
+    file: scripts/setup_local_ai.py
+    match: Unvalidated Output Injection
+    reason: >-
+      False positive. Both flagged calls (lines 98 and 128) use list-form
+      subprocess.run argv with no shell=True, so there is no shell
+      interpolation. Line 98 is fully hardcoded (`lemonade list --downloaded
+      --json`); line 128 is `lemonade pull <model>` where `model` comes from
+      argparse defaults / explicit --image-model/--tts-model/--stt-model flags,
+      not from LLM or model output. Nothing here consumes unvalidated model
+      output, so there is no injection sink to sanitize.
+  - skill: local-ai-use
+    rule: P2
+    file: templates/local-ai-rule.md
+    match: Hidden Instructions
+    reason: >-
+      False positive. Line 1 is the `<!-- BEGIN amd-skills:local-ai-use -->`
+      HTML comment, a benign machine-readable marker that setup_local_ai.py uses
+      to locate and replace the rule block in AGENTS.md in place on re-runs. It
+      carries no instructions; the surrounding rule text is plain, reviewable
+      content by design (it is the installable routing rule itself).
@@ -0,0 +1,124 @@
+name: skillspector
+
+# Statically scan every skill under skills/ with SkillSpector to catch malicious patterns and
+# security risks before they land on main. LLM semantic analysis is
+# intentionally disabled (--no-llm): the scan is fully static, needs no API
+# key, and runs in an isolated environment via uvx.
+#
+# Mirrors the discover-matrix-aggregate shape of validate.yml so each skill
+# is its own pass/fail and a single aggregate check (the `skillspector` job)
+# can be marked required in branch protection.
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - "skills/**"
+      - ".github/workflows/skillspector.yml"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  # Enumerate the skills so the scan job can fan out over them with a matrix,
+  # reusing the same discovery script that validate.yml relies on.
+  discover-skills:
+    name: Discover skills
+    runs-on: ubuntu-latest
+    outputs:
+      skills: ${{ steps.discover.outputs.skills }}
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: List skills
+        id: discover
+        run: echo "skills=$(uv run scripts/validate_skills.py --list)" >> "$GITHUB_OUTPUT"
+
+  scan-skill:
+    name: Scan skill
+    needs: discover-skills
+    runs-on: ubuntu-latest
+    strategy:
+      # Don't cancel the other skills when one fails; we want to see every
+      # skill's scan result in a single run.
+      fail-fast: false
+      matrix:
+        skill: ${{ fromJson(needs.discover-skills.outputs.skills) }}
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v7
+
+      # Run SkillSpector pinned to a specific commit for reproducibility and
+      # supply-chain safety. To bump it, update the SHA below to the desired
+      # skillspector commit (e.g. `git ls-remote https://github.com/NVIDIA/skillspector.git main`).
+      #
+      # The CLI exits 1 when a skill's *aggregate* risk score is HIGH/CRITICAL
+      # (score > 50) and 2 on error. We don't gate on the aggregate score,
+      # because a pile of MEDIUM findings can push the aggregate to HIGH even
+      # when no single finding is HIGH/CRITICAL. Instead we fail only when an
+      # individual finding is HIGH or CRITICAL (and always fail on error).
+      - name: Scan skill with SkillSpector
+        run: |
+          mkdir -p reports
+          report="reports/${{ matrix.skill }}.md"
+          set +e
+          uvx --python 3.12 \
+            --from "git+https://github.com/NVIDIA/skillspector.git@939da7d41eed4282e4d8217fe2254c69f690027e" \
+            skillspector scan "skills/${{ matrix.skill }}" \
+              --no-llm --format markdown --output "$report"
+          code=$?
+          set -e
+          echo "----- SkillSpector report: ${{ matrix.skill }} -----"
+          cat "$report" || true
+
+          # Exit code 2 means SkillSpector itself errored; surface that.
+          if [ "$code" = "2" ]; then
+            echo "SkillSpector errored (exit code 2)." >&2
+            exit 2
+          fi
+
+          # Fail when any individual finding is HIGH or CRITICAL, except for
+          # documented false positives recorded in .github/skillspector-allow.yml.
+          # SkillSpector has no native suppression, so the gate applies the
+          # allowlist here (see scripts/skillspector_gate.py).
+          uv run scripts/skillspector_gate.py \
+            --report "$report" \
+            --skill "${{ matrix.skill }}" \
+            --allowlist .github/skillspector-allow.yml
+
+      - name: Upload report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: skillspector-report-${{ matrix.skill }}
+          path: reports/${{ matrix.skill }}.md
+          if-no-files-found: warn
+
+  # Single gate that aggregates the per-skill matrix. Branch protection can
+  # require just this one check: it only passes when every skill scan
+  # succeeded. Because matrix jobs run independently under `fail-fast: false`,
+  # we inspect the job result explicitly rather than relying on `needs`
+  # short-circuiting.
+  skillspector:
+    name: SkillSpector security scan
+    needs: scan-skill
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Verify all skill scans passed
+        run: |
+          echo "scan-skill result: ${{ needs.scan-skill.result }}"
+          if [ "${{ needs.scan-skill.result }}" != "success" ]; then
+            echo "One or more skills failed the SkillSpector scan." >&2
+            exit 1
+          fi
+          echo "All skills passed the SkillSpector scan."
@@ -11,3 +11,6 @@ __pycache__/
 
 # Eval run artifacts
 eval/runs/
+
+# Behavioral matrix results
+eval/behavioral/results/
@@ -0,0 +1,28 @@
+"""pytest wiring for the behavioral harness.
+
+Adds this directory to ``sys.path`` so tests can ``from harness import ...``,
+and runs a one-time API preflight so the (expensive) behavioral runs fail
+fast with a clear message when the `claude` API isn't reachable -- e.g.
+when you're not connected to the network that can reach it.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+
+from harness import DEFAULT_MODEL, check_api_reachable  # noqa: E402
+
+
+@pytest.fixture(scope="session", autouse=True)
+def _require_api_reachable() -> None:
+    """Fail the suite up front if the `claude` API can't be reached."""
+    ok, detail = check_api_reachable(DEFAULT_MODEL)
+    if not ok:
+        pytest.fail(
+            f"claude API not reachable -- are you on the right network? ({detail})"
+        )