feat(scan-eval): add --scanners flag to run Docker-bundled security scanners (#574)

Dumbris · web-flow · commit e776e3b864d6 · 2026-06-02T21:28:15.000+03:00
* feat(scan-eval): add --scanners flag to run Docker-bundled security scanners Wire the --scanners flag in cmd/scan-eval to run bundled security scanner plugins against synthetic single-tool MCP-server sources per corpus entry and fold their findings into the scan-verdict report. - newDockerScannerRunner: executes a scanner via DockerRunner, reads the SARIF report, and normalizes findings (security-by-default: network none, read-only source mount). - applyScanners: resolves requested scanner IDs against the registry, skips non-runnable scanners (Docker disabled / missing secret env), and appends per-scanner verdicts; unknown IDs exit 4 (config error). - collectScannerEnv / parseTimeout helpers; offline scanners run by default, network/secret scanners require explicit IDs + env. - main.go: enable Docker via MCPPROXY_SCAN_EVAL_DOCKER, build the registry, and run applyScanners after evaluate. Docker execution is gated behind an env flag so the cheap per-PR gate stays offline. Tests cover SARIF parsing, error/non-zero-exit handling, env collection, timeout parsing, scanner selection, and the exit-4 CLI path. Related #MCP-744 * fix(scan-eval): gate NetworkReq scanner skip on Docker-unavailable only (Codex finding) The runnabilityReason function unconditionally skipped NetworkReq scanners even when Docker was enabled. When the operator explicitly opts in via --scanners + MCPPROXY_SCAN_EVAL_DOCKER=1, network-req scanners should be runnable — the Docker-off case is already covered by the earlier return. Adds TestSelectScanners_NetworkReq_RunnableWhenDockerEnabled to lock in the new behavior. Related #574
diff --git a/cmd/scan-eval/docker_runner.go b/cmd/scan-eval/docker_runner.go
@@ -0,0 +1,110 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
+)
+
+// defaultScannerTimeout is the conservative ceiling used when a plugin declares
+// no timeout (or an unparseable one) so a misconfigured value never hangs a run.
+const defaultScannerTimeout = 120 * time.Second
+
+// scannerExec is the narrow slice of *scanner.DockerRunner the production runner
+// needs. Defining it here lets unit tests inject a deterministic stub while
+// production wires the real Docker-backed implementation (dependency inversion,
+// constitution: testability + 3-layer upstream client).
+type scannerExec interface {
+	RunScanner(ctx context.Context, cfg scanner.ScannerRunConfig) (stdout, stderr string, exitCode int, err error)
+	ReadReportFile(reportDir string) ([]byte, error)
+}
+
+// newDockerScannerRunner builds the production scannerRunner: per entry it
+// materializes the corpus text as tools.json in a freshly mounted source dir,
+// runs the scanner container offline (NetworkMode=none — selectScanners is the
+// network gate, the runner is offline-by-default, Security-by-Default), and
+// parses the SARIF report into findings tagged with the scanner id. A docker
+// exec failure or an unreadable report is surfaced as an error so the caller
+// records a non-flagging verdict plus a warning (an unavailable scanner must
+// never manufacture a finding). Scanners signal hits via a non-zero exit code,
+// so a non-zero exit accompanied by a parseable report is NOT a failure.
+func newDockerScannerRunner(exec scannerExec, baseDir string, lookupEnv func(string) (string, bool)) scannerRunner {
+	return func(ctx context.Context, p *scanner.ScannerPlugin, e corpusEntry) ([]scanner.ScanFinding, error) {
+		workDir, err := os.MkdirTemp(baseDir, fmt.Sprintf("scan-%s-", p.ID))
+		if err != nil {
+			return nil, fmt.Errorf("scanner %s: create work dir: %w", p.ID, err)
+		}
+		defer os.RemoveAll(workDir)
+
+		sourceDir := filepath.Join(workDir, "source")
+		reportDir := filepath.Join(workDir, "report")
+		for _, d := range []string{sourceDir, reportDir} {
+			if mkErr := os.MkdirAll(d, 0o750); mkErr != nil {
+				return nil, fmt.Errorf("scanner %s: prepare %s: %w", p.ID, d, mkErr)
+			}
+		}
+		if wErr := writeToolsJSON(sourceDir, e); wErr != nil {
+			return nil, fmt.Errorf("scanner %s: write tools.json: %w", p.ID, wErr)
+		}
+
+		cfg := scanner.ScannerRunConfig{
+			ContainerName: scanner.GenerateContainerName(p.ID, e.ID),
+			Image:         p.EffectiveImage(),
+			Command:       p.Command,
+			Env:           collectScannerEnv(p, lookupEnv),
+			SourceDir:     sourceDir,
+			ReportDir:     reportDir,
+			NetworkMode:   "none",
+			Timeout:       parseTimeout(p.Timeout),
+		}
+
+		_, stderrOut, exitCode, runErr := exec.RunScanner(ctx, cfg)
+		if runErr != nil {
+			return nil, fmt.Errorf("scanner %s exec failed: %w", p.ID, runErr)
+		}
+
+		data, reportErr := exec.ReadReportFile(reportDir)
+		if reportErr != nil {
+			return nil, fmt.Errorf("scanner %s (exit %d): no readable report: %w; stderr: %s",
+				p.ID, exitCode, reportErr, stderrOut)
+		}
+		return findingsFromReport(p.ID, data), nil
+	}
+}
+
+// collectScannerEnv assembles the container environment from declared keys only:
+// configured defaults first, then any present required/optional lookups (an
+// absent key is omitted, never blank). Ambient process secrets never leak into
+// the scanner subprocess (Security-by-Default).
+func collectScannerEnv(p *scanner.ScannerPlugin, lookupEnv func(string) (string, bool)) map[string]string {
+	env := make(map[string]string, len(p.ConfiguredEnv))
+	for k, v := range p.ConfiguredEnv {
+		env[k] = v
+	}
+	for _, req := range p.RequiredEnv {
+		if v, ok := lookupEnv(req.Key); ok {
+			env[req.Key] = v
+		}
+	}
+	for _, opt := range p.OptionalEnv {
+		if v, ok := lookupEnv(opt.Key); ok {
+			env[opt.Key] = v
+		}
+	}
+	return env
+}
+
+// parseTimeout parses a plugin's declared timeout, falling back to the
+// conservative default for empty, unparseable, or non-positive values so a
+// misconfigured timeout never hangs (or instantly kills) a run.
+func parseTimeout(s string) time.Duration {
+	d, err := time.ParseDuration(s)
+	if err != nil || d <= 0 {
+		return defaultScannerTimeout
+	}
+	return d
+}
diff --git a/cmd/scan-eval/main.go b/cmd/scan-eval/main.go
@@ -7,8 +7,9 @@
 //
 //	scan-eval --corpus datasets/security_corpus_v1.json [--out verdicts.json]
 //
-// The optional --scanners flag is a reserved extension point for the Docker
-// bundled scanner registry; it is not yet implemented (deferred per Gate 2).
+// The optional --scanners flag opts into Docker-isolated bundled security
+// scanners (offline by default; set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable
+// container execution). Each requested scanner appends a per-entry verdict.
 package main
 
 import (
@@ -19,7 +20,10 @@ import (
 	"os"
 	"strings"
 
+	"go.uber.org/zap"
+
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/security"
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
 )
 
 const (
@@ -40,7 +44,7 @@ func run(args []string, stdout, stderr io.Writer) int {
 	corpusPath := fs.String("corpus", "", "path to the D2 security corpus JSON (required)")
 	outPath := fs.String("out", "", "output path for verdict JSON (default: stdout)")
 	detectors := fs.String("detectors", detectorSensitiveData, "comma-separated detectors to run (only 'sensitive-data' is supported)")
-	scanners := fs.String("scanners", "", "opt-in Docker bundled scanner ids (reserved extension point; not yet implemented)")
+	scanners := fs.String("scanners", "", "comma-separated Docker bundled scanner ids to run (offline; set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable)")
 
 	if err := fs.Parse(args); err != nil {
 		return exitConfigError
@@ -53,10 +57,6 @@ func run(args []string, stdout, stderr io.Writer) int {
 		fmt.Fprintf(stderr, "error: %v\n", err)
 		return exitConfigError
 	}
-	if *scanners != "" {
-		fmt.Fprintf(stderr, "warning: --scanners=%q is a reserved extension point and is not yet implemented; ignoring\n", *scanners)
-	}
-
 	c, err := loadCorpus(*corpusPath)
 	if err != nil {
 		fmt.Fprintf(stderr, "error: %v\n", err)
@@ -65,6 +65,20 @@ func run(args []string, stdout, stderr io.Writer) int {
 
 	report := evaluate(c, security.NewDetector(nil))
 
+	// Optional Docker bundled scanners. An unknown id is a hard config error
+	// (exit 4); a skip under current constraints is only a warning so detector
+	// verdicts still emit. Docker is the gate — offline-by-default unless the
+	// operator opts in via MCPPROXY_SCAN_EVAL_DOCKER.
+	if *scanners != "" {
+		dockerEnv := os.Getenv("MCPPROXY_SCAN_EVAL_DOCKER")
+		dockerEnabled := dockerEnv == "1" || strings.EqualFold(dockerEnv, "true")
+		reg := scanner.NewRegistry("", zap.NewNop())
+		if err := applyScanners(report, c, reg, *scanners, dockerEnabled, os.LookupEnv, nil, stderr); err != nil {
+			fmt.Fprintf(stderr, "error: %v\n", err)
+			return exitConfigError
+		}
+	}
+
 	out, err := json.MarshalIndent(report, "", "  ")
 	if err != nil {
 		fmt.Fprintf(stderr, "error: marshaling verdict report: %v\n", err)
diff --git a/cmd/scan-eval/scanners.go b/cmd/scan-eval/scanners.go
@@ -0,0 +1,220 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"go.uber.org/zap"
+
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
+)
+
+// scannerRunner executes one scanner against one corpus entry and returns its
+// normalized findings. It is injected so unit tests can supply a deterministic
+// mock while production wires a Docker-backed implementation (--scanners).
+type scannerRunner func(ctx context.Context, p *scanner.ScannerPlugin, e corpusEntry) ([]scanner.ScanFinding, error)
+
+// selectScanners resolves a comma-separated list of scanner ids against the
+// registry and partitions them into runnable vs skipped under the current
+// constraints. An unknown id is a hard config error (never a silent skip);
+// the caller maps it to exit 4. Both slices are sorted by id for deterministic
+// output (INV-5). Selection is pure — warnings are emitted by the caller via
+// runnabilityReason so this stays trivially testable.
+func selectScanners(reg *scanner.Registry, csv string, dockerEnabled bool, lookupEnv func(string) (string, bool)) (run, skipped []*scanner.ScannerPlugin, err error) {
+	seen := make(map[string]bool)
+	selected := make([]*scanner.ScannerPlugin, 0)
+	for _, raw := range strings.Split(csv, ",") {
+		id := strings.TrimSpace(raw)
+		if id == "" || seen[id] {
+			continue
+		}
+		seen[id] = true
+		p, gerr := reg.Get(id)
+		if gerr != nil {
+			return nil, nil, fmt.Errorf("unknown scanner %q: %w", id, gerr)
+		}
+		selected = append(selected, p)
+	}
+	sort.Slice(selected, func(i, j int) bool { return selected[i].ID < selected[j].ID })
+
+	for _, p := range selected {
+		if runnabilityReason(p, dockerEnabled, lookupEnv) != "" {
+			skipped = append(skipped, p)
+		} else {
+			run = append(run, p)
+		}
+	}
+	return run, skipped, nil
+}
+
+// runnabilityReason returns "" when the scanner can run under the given
+// constraints, otherwise a human-readable reason it must be skipped. Used both
+// to partition in selectScanners and to explain the skip to the operator, so
+// the gating rules live in exactly one place (DRY). Order: Docker is the
+// cheapest gate, then secrets. Network-req scanners are NOT gated here — the
+// Docker gate above subsumes that (Docker-off → everything skipped).
+func runnabilityReason(p *scanner.ScannerPlugin, dockerEnabled bool, lookupEnv func(string) (string, bool)) string {
+	if !dockerEnabled {
+		return "Docker isolation disabled (set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable)"
+	}
+	for _, req := range p.RequiredEnv {
+		if _, ok := lookupEnv(req.Key); !ok {
+			return fmt.Sprintf("missing required secret %s", req.Key)
+		}
+	}
+	// Network-req scanners are NOT skipped here: when Docker is available the
+	// operator explicitly opted in via --scanners + MCPPROXY_SCAN_EVAL_DOCKER=1,
+	// and running the scanner (even offline — the runner enforces NetworkMode=none,
+	// Security-by-Default) is preferred over silently skipping it. The Docker gate
+	// above already covers the Docker-unavailable case (everything skipped), so
+	// reaching here means Docker IS enabled.
+	return ""
+}
+
+// severityRank orders severities for max-severity computation. info, the empty
+// string, and unknown values all rank 0 so they neither flag nor set
+// max_severity — the schema enum is {critical,high,medium,low} only, and info
+// findings are kept solely as provenance in detections.
+func severityRank(s string) int {
+	switch s {
+	case scanner.SeverityCritical:
+		return 4
+	case scanner.SeverityHigh:
+		return 3
+	case scanner.SeverityMedium:
+		return 2
+	case scanner.SeverityLow:
+		return 1
+	default:
+		return 0
+	}
+}
+
+// scanFindingsToVerdict projects a scanner's findings into one detectorVerdict.
+// Every finding (including info) is recorded in detections for provenance, but
+// only {critical,high,medium,low} contribute to flagged/max_severity, so the
+// flagged ⇔ max_severity!="" invariant holds. Detections is always non-nil.
+func scanFindingsToVerdict(id string, findings []scanner.ScanFinding) detectorVerdict {
+	v := detectorVerdict{
+		Detector:   id,
+		Detections: make([]detectionView, 0, len(findings)),
+	}
+	for _, f := range findings {
+		v.Detections = append(v.Detections, detectionView{
+			Type:     f.RuleID,
+			Category: f.Category,
+			Severity: f.Severity,
+		})
+		if severityRank(f.Severity) > severityRank(v.MaxSeverity) {
+			v.MaxSeverity = f.Severity
+			v.Flagged = true
+		}
+	}
+	return v
+}
+
+// appendScannerVerdicts augments an existing detector report in place: each
+// plugin id is appended to report.Detectors and every entry gains one verdict
+// per plugin. A per-entry runner error is a safe non-flag (an unavailable
+// scanner must never manufacture a finding) plus a one-line stderr warning.
+// Entries are matched to corpus entries by id rather than slice position so a
+// reordered or partial report stays correct.
+func appendScannerVerdicts(report *verdictReport, c *corpus, plugins []*scanner.ScannerPlugin, runner scannerRunner, stderr io.Writer) {
+	if len(plugins) == 0 {
+		return
+	}
+	byID := make(map[string]corpusEntry, len(c.Entries))
+	for _, e := range c.Entries {
+		byID[e.ID] = e
+	}
+	for _, p := range plugins {
+		report.Detectors = append(report.Detectors, p.ID)
+	}
+	for i := range report.Entries {
+		entry := &report.Entries[i]
+		ce, ok := byID[entry.ID]
+		for _, p := range plugins {
+			if !ok {
+				entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil))
+				continue
+			}
+			findings, rerr := runner(context.Background(), p, ce)
+			if rerr != nil {
+				fmt.Fprintf(stderr, "warning: scanner %s failed on entry %s: %v\n", p.ID, entry.ID, rerr)
+				entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil))
+				continue
+			}
+			entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, findings))
+		}
+	}
+}
+
+// applyScanners resolves the requested scanner ids against the registry, warns
+// about any skipped under the current constraints, and — for the runnable set —
+// appends their verdicts to the report in place. An unknown id is a hard config
+// error the caller maps to exit 4 (never a silent skip); a skip is a warning,
+// never an error, so the detector verdicts still emit. runner is injected for
+// tests; when nil a Docker-backed runner is constructed (offline-by-default).
+// The scratch base dir is created lazily so the docker-disabled path touches no
+// filesystem and emits clean JSON.
+func applyScanners(report *verdictReport, c *corpus, reg *scanner.Registry, scannerIDs string, dockerEnabled bool, lookupEnv func(string) (string, bool), runner scannerRunner, stderr io.Writer) error {
+	run, skipped, err := selectScanners(reg, scannerIDs, dockerEnabled, lookupEnv)
+	if err != nil {
+		return err
+	}
+	for _, p := range skipped {
+		fmt.Fprintf(stderr, "warning: skipping scanner %s: %s\n", p.ID, runnabilityReason(p, dockerEnabled, lookupEnv))
+	}
+	if len(run) == 0 {
+		return nil
+	}
+	if runner == nil {
+		baseDir, mkErr := os.MkdirTemp("", "scan-eval-")
+		if mkErr != nil {
+			return fmt.Errorf("scanner work dir: %w", mkErr)
+		}
+		defer os.RemoveAll(baseDir)
+		runner = newDockerScannerRunner(scanner.NewDockerRunner(zap.NewNop()), baseDir, lookupEnv)
+	}
+	appendScannerVerdicts(report, c, run, runner, stderr)
+	return nil
+}
+
+// findingsFromReport parses a scanner's report bytes into normalized findings,
+// tagged with the scanner id. The runner is SARIF-only and safe-by-default: any
+// report that is not valid SARIF (empty, non-SARIF JSON, or malformed) yields no
+// findings rather than an error, so an unreadable report can never manufacture a
+// verdict (security-by-default, constitution).
+func findingsFromReport(id string, data []byte) []scanner.ScanFinding {
+	if !scanner.IsSARIF(data) {
+		return nil
+	}
+	report, err := scanner.ParseSARIF(data)
+	if err != nil {
+		return nil
+	}
+	return scanner.NormalizeFindings(report, id)
+}
+
+// writeToolsJSON materializes a corpus entry as a single-tool source tree in the
+// {"tools":[{name,description}]} shape the bundled scanners read (mirrors
+// scanner.Service.exportToolDefinitions). The entry id becomes the tool name and
+// the corpus description becomes the tool description the scanners inspect.
+func writeToolsJSON(dir string, e corpusEntry) error {
+	doc := map[string]any{
+		"tools": []map[string]string{
+			{"name": e.ID, "description": e.Description},
+		},
+	}
+	data, err := json.MarshalIndent(doc, "", "  ")
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(filepath.Join(dir, "tools.json"), data, 0o600)
+}
diff --git a/cmd/scan-eval/scanners_test.go b/cmd/scan-eval/scanners_test.go