Skip to content

Commit e776e3b

Browse files
authored
feat(scan-eval): add --scanners flag to run Docker-bundled security scanners (#574)
* feat(scan-eval): add --scanners flag to run Docker-bundled security scanners Wire the --scanners flag in cmd/scan-eval to run bundled security scanner plugins against synthetic single-tool MCP-server sources per corpus entry and fold their findings into the scan-verdict report. - newDockerScannerRunner: executes a scanner via DockerRunner, reads the SARIF report, and normalizes findings (security-by-default: network none, read-only source mount). - applyScanners: resolves requested scanner IDs against the registry, skips non-runnable scanners (Docker disabled / missing secret env), and appends per-scanner verdicts; unknown IDs exit 4 (config error). - collectScannerEnv / parseTimeout helpers; offline scanners run by default, network/secret scanners require explicit IDs + env. - main.go: enable Docker via MCPPROXY_SCAN_EVAL_DOCKER, build the registry, and run applyScanners after evaluate. Docker execution is gated behind an env flag so the cheap per-PR gate stays offline. Tests cover SARIF parsing, error/non-zero-exit handling, env collection, timeout parsing, scanner selection, and the exit-4 CLI path. Related #MCP-744 * fix(scan-eval): gate NetworkReq scanner skip on Docker-unavailable only (Codex finding) The runnabilityReason function unconditionally skipped NetworkReq scanners even when Docker was enabled. When the operator explicitly opts in via --scanners + MCPPROXY_SCAN_EVAL_DOCKER=1, network-req scanners should be runnable — the Docker-off case is already covered by the earlier return. Adds TestSelectScanners_NetworkReq_RunnableWhenDockerEnabled to lock in the new behavior. Related #574
1 parent 37d203b commit e776e3b

4 files changed

Lines changed: 845 additions & 7 deletions

File tree

cmd/scan-eval/docker_runner.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"path/filepath"
8+
"time"
9+
10+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
11+
)
12+
13+
// defaultScannerTimeout is the conservative ceiling used when a plugin declares
14+
// no timeout (or an unparseable one) so a misconfigured value never hangs a run.
15+
const defaultScannerTimeout = 120 * time.Second
16+
17+
// scannerExec is the narrow slice of *scanner.DockerRunner the production runner
18+
// needs. Defining it here lets unit tests inject a deterministic stub while
19+
// production wires the real Docker-backed implementation (dependency inversion,
20+
// constitution: testability + 3-layer upstream client).
21+
type scannerExec interface {
22+
RunScanner(ctx context.Context, cfg scanner.ScannerRunConfig) (stdout, stderr string, exitCode int, err error)
23+
ReadReportFile(reportDir string) ([]byte, error)
24+
}
25+
26+
// newDockerScannerRunner builds the production scannerRunner: per entry it
27+
// materializes the corpus text as tools.json in a freshly mounted source dir,
28+
// runs the scanner container offline (NetworkMode=none — selectScanners is the
29+
// network gate, the runner is offline-by-default, Security-by-Default), and
30+
// parses the SARIF report into findings tagged with the scanner id. A docker
31+
// exec failure or an unreadable report is surfaced as an error so the caller
32+
// records a non-flagging verdict plus a warning (an unavailable scanner must
33+
// never manufacture a finding). Scanners signal hits via a non-zero exit code,
34+
// so a non-zero exit accompanied by a parseable report is NOT a failure.
35+
func newDockerScannerRunner(exec scannerExec, baseDir string, lookupEnv func(string) (string, bool)) scannerRunner {
36+
return func(ctx context.Context, p *scanner.ScannerPlugin, e corpusEntry) ([]scanner.ScanFinding, error) {
37+
workDir, err := os.MkdirTemp(baseDir, fmt.Sprintf("scan-%s-", p.ID))
38+
if err != nil {
39+
return nil, fmt.Errorf("scanner %s: create work dir: %w", p.ID, err)
40+
}
41+
defer os.RemoveAll(workDir)
42+
43+
sourceDir := filepath.Join(workDir, "source")
44+
reportDir := filepath.Join(workDir, "report")
45+
for _, d := range []string{sourceDir, reportDir} {
46+
if mkErr := os.MkdirAll(d, 0o750); mkErr != nil {
47+
return nil, fmt.Errorf("scanner %s: prepare %s: %w", p.ID, d, mkErr)
48+
}
49+
}
50+
if wErr := writeToolsJSON(sourceDir, e); wErr != nil {
51+
return nil, fmt.Errorf("scanner %s: write tools.json: %w", p.ID, wErr)
52+
}
53+
54+
cfg := scanner.ScannerRunConfig{
55+
ContainerName: scanner.GenerateContainerName(p.ID, e.ID),
56+
Image: p.EffectiveImage(),
57+
Command: p.Command,
58+
Env: collectScannerEnv(p, lookupEnv),
59+
SourceDir: sourceDir,
60+
ReportDir: reportDir,
61+
NetworkMode: "none",
62+
Timeout: parseTimeout(p.Timeout),
63+
}
64+
65+
_, stderrOut, exitCode, runErr := exec.RunScanner(ctx, cfg)
66+
if runErr != nil {
67+
return nil, fmt.Errorf("scanner %s exec failed: %w", p.ID, runErr)
68+
}
69+
70+
data, reportErr := exec.ReadReportFile(reportDir)
71+
if reportErr != nil {
72+
return nil, fmt.Errorf("scanner %s (exit %d): no readable report: %w; stderr: %s",
73+
p.ID, exitCode, reportErr, stderrOut)
74+
}
75+
return findingsFromReport(p.ID, data), nil
76+
}
77+
}
78+
79+
// collectScannerEnv assembles the container environment from declared keys only:
80+
// configured defaults first, then any present required/optional lookups (an
81+
// absent key is omitted, never blank). Ambient process secrets never leak into
82+
// the scanner subprocess (Security-by-Default).
83+
func collectScannerEnv(p *scanner.ScannerPlugin, lookupEnv func(string) (string, bool)) map[string]string {
84+
env := make(map[string]string, len(p.ConfiguredEnv))
85+
for k, v := range p.ConfiguredEnv {
86+
env[k] = v
87+
}
88+
for _, req := range p.RequiredEnv {
89+
if v, ok := lookupEnv(req.Key); ok {
90+
env[req.Key] = v
91+
}
92+
}
93+
for _, opt := range p.OptionalEnv {
94+
if v, ok := lookupEnv(opt.Key); ok {
95+
env[opt.Key] = v
96+
}
97+
}
98+
return env
99+
}
100+
101+
// parseTimeout parses a plugin's declared timeout, falling back to the
102+
// conservative default for empty, unparseable, or non-positive values so a
103+
// misconfigured timeout never hangs (or instantly kills) a run.
104+
func parseTimeout(s string) time.Duration {
105+
d, err := time.ParseDuration(s)
106+
if err != nil || d <= 0 {
107+
return defaultScannerTimeout
108+
}
109+
return d
110+
}

cmd/scan-eval/main.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
//
88
// scan-eval --corpus datasets/security_corpus_v1.json [--out verdicts.json]
99
//
10-
// The optional --scanners flag is a reserved extension point for the Docker
11-
// bundled scanner registry; it is not yet implemented (deferred per Gate 2).
10+
// The optional --scanners flag opts into Docker-isolated bundled security
11+
// scanners (offline by default; set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable
12+
// container execution). Each requested scanner appends a per-entry verdict.
1213
package main
1314

1415
import (
@@ -19,7 +20,10 @@ import (
1920
"os"
2021
"strings"
2122

23+
"go.uber.org/zap"
24+
2225
"github.com/smart-mcp-proxy/mcpproxy-go/internal/security"
26+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
2327
)
2428

2529
const (
@@ -40,7 +44,7 @@ func run(args []string, stdout, stderr io.Writer) int {
4044
corpusPath := fs.String("corpus", "", "path to the D2 security corpus JSON (required)")
4145
outPath := fs.String("out", "", "output path for verdict JSON (default: stdout)")
4246
detectors := fs.String("detectors", detectorSensitiveData, "comma-separated detectors to run (only 'sensitive-data' is supported)")
43-
scanners := fs.String("scanners", "", "opt-in Docker bundled scanner ids (reserved extension point; not yet implemented)")
47+
scanners := fs.String("scanners", "", "comma-separated Docker bundled scanner ids to run (offline; set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable)")
4448

4549
if err := fs.Parse(args); err != nil {
4650
return exitConfigError
@@ -53,10 +57,6 @@ func run(args []string, stdout, stderr io.Writer) int {
5357
fmt.Fprintf(stderr, "error: %v\n", err)
5458
return exitConfigError
5559
}
56-
if *scanners != "" {
57-
fmt.Fprintf(stderr, "warning: --scanners=%q is a reserved extension point and is not yet implemented; ignoring\n", *scanners)
58-
}
59-
6060
c, err := loadCorpus(*corpusPath)
6161
if err != nil {
6262
fmt.Fprintf(stderr, "error: %v\n", err)
@@ -65,6 +65,20 @@ func run(args []string, stdout, stderr io.Writer) int {
6565

6666
report := evaluate(c, security.NewDetector(nil))
6767

68+
// Optional Docker bundled scanners. An unknown id is a hard config error
69+
// (exit 4); a skip under current constraints is only a warning so detector
70+
// verdicts still emit. Docker is the gate — offline-by-default unless the
71+
// operator opts in via MCPPROXY_SCAN_EVAL_DOCKER.
72+
if *scanners != "" {
73+
dockerEnv := os.Getenv("MCPPROXY_SCAN_EVAL_DOCKER")
74+
dockerEnabled := dockerEnv == "1" || strings.EqualFold(dockerEnv, "true")
75+
reg := scanner.NewRegistry("", zap.NewNop())
76+
if err := applyScanners(report, c, reg, *scanners, dockerEnabled, os.LookupEnv, nil, stderr); err != nil {
77+
fmt.Fprintf(stderr, "error: %v\n", err)
78+
return exitConfigError
79+
}
80+
}
81+
6882
out, err := json.MarshalIndent(report, "", " ")
6983
if err != nil {
7084
fmt.Fprintf(stderr, "error: marshaling verdict report: %v\n", err)

cmd/scan-eval/scanners.go

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"os"
9+
"path/filepath"
10+
"sort"
11+
"strings"
12+
13+
"go.uber.org/zap"
14+
15+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner"
16+
)
17+
18+
// scannerRunner executes one scanner against one corpus entry and returns its
19+
// normalized findings. It is injected so unit tests can supply a deterministic
20+
// mock while production wires a Docker-backed implementation (--scanners).
21+
type scannerRunner func(ctx context.Context, p *scanner.ScannerPlugin, e corpusEntry) ([]scanner.ScanFinding, error)
22+
23+
// selectScanners resolves a comma-separated list of scanner ids against the
24+
// registry and partitions them into runnable vs skipped under the current
25+
// constraints. An unknown id is a hard config error (never a silent skip);
26+
// the caller maps it to exit 4. Both slices are sorted by id for deterministic
27+
// output (INV-5). Selection is pure — warnings are emitted by the caller via
28+
// runnabilityReason so this stays trivially testable.
29+
func selectScanners(reg *scanner.Registry, csv string, dockerEnabled bool, lookupEnv func(string) (string, bool)) (run, skipped []*scanner.ScannerPlugin, err error) {
30+
seen := make(map[string]bool)
31+
selected := make([]*scanner.ScannerPlugin, 0)
32+
for _, raw := range strings.Split(csv, ",") {
33+
id := strings.TrimSpace(raw)
34+
if id == "" || seen[id] {
35+
continue
36+
}
37+
seen[id] = true
38+
p, gerr := reg.Get(id)
39+
if gerr != nil {
40+
return nil, nil, fmt.Errorf("unknown scanner %q: %w", id, gerr)
41+
}
42+
selected = append(selected, p)
43+
}
44+
sort.Slice(selected, func(i, j int) bool { return selected[i].ID < selected[j].ID })
45+
46+
for _, p := range selected {
47+
if runnabilityReason(p, dockerEnabled, lookupEnv) != "" {
48+
skipped = append(skipped, p)
49+
} else {
50+
run = append(run, p)
51+
}
52+
}
53+
return run, skipped, nil
54+
}
55+
56+
// runnabilityReason returns "" when the scanner can run under the given
57+
// constraints, otherwise a human-readable reason it must be skipped. Used both
58+
// to partition in selectScanners and to explain the skip to the operator, so
59+
// the gating rules live in exactly one place (DRY). Order: Docker is the
60+
// cheapest gate, then secrets. Network-req scanners are NOT gated here — the
61+
// Docker gate above subsumes that (Docker-off → everything skipped).
62+
func runnabilityReason(p *scanner.ScannerPlugin, dockerEnabled bool, lookupEnv func(string) (string, bool)) string {
63+
if !dockerEnabled {
64+
return "Docker isolation disabled (set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable)"
65+
}
66+
for _, req := range p.RequiredEnv {
67+
if _, ok := lookupEnv(req.Key); !ok {
68+
return fmt.Sprintf("missing required secret %s", req.Key)
69+
}
70+
}
71+
// Network-req scanners are NOT skipped here: when Docker is available the
72+
// operator explicitly opted in via --scanners + MCPPROXY_SCAN_EVAL_DOCKER=1,
73+
// and running the scanner (even offline — the runner enforces NetworkMode=none,
74+
// Security-by-Default) is preferred over silently skipping it. The Docker gate
75+
// above already covers the Docker-unavailable case (everything skipped), so
76+
// reaching here means Docker IS enabled.
77+
return ""
78+
}
79+
80+
// severityRank orders severities for max-severity computation. info, the empty
81+
// string, and unknown values all rank 0 so they neither flag nor set
82+
// max_severity — the schema enum is {critical,high,medium,low} only, and info
83+
// findings are kept solely as provenance in detections.
84+
func severityRank(s string) int {
85+
switch s {
86+
case scanner.SeverityCritical:
87+
return 4
88+
case scanner.SeverityHigh:
89+
return 3
90+
case scanner.SeverityMedium:
91+
return 2
92+
case scanner.SeverityLow:
93+
return 1
94+
default:
95+
return 0
96+
}
97+
}
98+
99+
// scanFindingsToVerdict projects a scanner's findings into one detectorVerdict.
100+
// Every finding (including info) is recorded in detections for provenance, but
101+
// only {critical,high,medium,low} contribute to flagged/max_severity, so the
102+
// flagged ⇔ max_severity!="" invariant holds. Detections is always non-nil.
103+
func scanFindingsToVerdict(id string, findings []scanner.ScanFinding) detectorVerdict {
104+
v := detectorVerdict{
105+
Detector: id,
106+
Detections: make([]detectionView, 0, len(findings)),
107+
}
108+
for _, f := range findings {
109+
v.Detections = append(v.Detections, detectionView{
110+
Type: f.RuleID,
111+
Category: f.Category,
112+
Severity: f.Severity,
113+
})
114+
if severityRank(f.Severity) > severityRank(v.MaxSeverity) {
115+
v.MaxSeverity = f.Severity
116+
v.Flagged = true
117+
}
118+
}
119+
return v
120+
}
121+
122+
// appendScannerVerdicts augments an existing detector report in place: each
123+
// plugin id is appended to report.Detectors and every entry gains one verdict
124+
// per plugin. A per-entry runner error is a safe non-flag (an unavailable
125+
// scanner must never manufacture a finding) plus a one-line stderr warning.
126+
// Entries are matched to corpus entries by id rather than slice position so a
127+
// reordered or partial report stays correct.
128+
func appendScannerVerdicts(report *verdictReport, c *corpus, plugins []*scanner.ScannerPlugin, runner scannerRunner, stderr io.Writer) {
129+
if len(plugins) == 0 {
130+
return
131+
}
132+
byID := make(map[string]corpusEntry, len(c.Entries))
133+
for _, e := range c.Entries {
134+
byID[e.ID] = e
135+
}
136+
for _, p := range plugins {
137+
report.Detectors = append(report.Detectors, p.ID)
138+
}
139+
for i := range report.Entries {
140+
entry := &report.Entries[i]
141+
ce, ok := byID[entry.ID]
142+
for _, p := range plugins {
143+
if !ok {
144+
entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil))
145+
continue
146+
}
147+
findings, rerr := runner(context.Background(), p, ce)
148+
if rerr != nil {
149+
fmt.Fprintf(stderr, "warning: scanner %s failed on entry %s: %v\n", p.ID, entry.ID, rerr)
150+
entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil))
151+
continue
152+
}
153+
entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, findings))
154+
}
155+
}
156+
}
157+
158+
// applyScanners resolves the requested scanner ids against the registry, warns
159+
// about any skipped under the current constraints, and — for the runnable set —
160+
// appends their verdicts to the report in place. An unknown id is a hard config
161+
// error the caller maps to exit 4 (never a silent skip); a skip is a warning,
162+
// never an error, so the detector verdicts still emit. runner is injected for
163+
// tests; when nil a Docker-backed runner is constructed (offline-by-default).
164+
// The scratch base dir is created lazily so the docker-disabled path touches no
165+
// filesystem and emits clean JSON.
166+
func applyScanners(report *verdictReport, c *corpus, reg *scanner.Registry, scannerIDs string, dockerEnabled bool, lookupEnv func(string) (string, bool), runner scannerRunner, stderr io.Writer) error {
167+
run, skipped, err := selectScanners(reg, scannerIDs, dockerEnabled, lookupEnv)
168+
if err != nil {
169+
return err
170+
}
171+
for _, p := range skipped {
172+
fmt.Fprintf(stderr, "warning: skipping scanner %s: %s\n", p.ID, runnabilityReason(p, dockerEnabled, lookupEnv))
173+
}
174+
if len(run) == 0 {
175+
return nil
176+
}
177+
if runner == nil {
178+
baseDir, mkErr := os.MkdirTemp("", "scan-eval-")
179+
if mkErr != nil {
180+
return fmt.Errorf("scanner work dir: %w", mkErr)
181+
}
182+
defer os.RemoveAll(baseDir)
183+
runner = newDockerScannerRunner(scanner.NewDockerRunner(zap.NewNop()), baseDir, lookupEnv)
184+
}
185+
appendScannerVerdicts(report, c, run, runner, stderr)
186+
return nil
187+
}
188+
189+
// findingsFromReport parses a scanner's report bytes into normalized findings,
190+
// tagged with the scanner id. The runner is SARIF-only and safe-by-default: any
191+
// report that is not valid SARIF (empty, non-SARIF JSON, or malformed) yields no
192+
// findings rather than an error, so an unreadable report can never manufacture a
193+
// verdict (security-by-default, constitution).
194+
func findingsFromReport(id string, data []byte) []scanner.ScanFinding {
195+
if !scanner.IsSARIF(data) {
196+
return nil
197+
}
198+
report, err := scanner.ParseSARIF(data)
199+
if err != nil {
200+
return nil
201+
}
202+
return scanner.NormalizeFindings(report, id)
203+
}
204+
205+
// writeToolsJSON materializes a corpus entry as a single-tool source tree in the
206+
// {"tools":[{name,description}]} shape the bundled scanners read (mirrors
207+
// scanner.Service.exportToolDefinitions). The entry id becomes the tool name and
208+
// the corpus description becomes the tool description the scanners inspect.
209+
func writeToolsJSON(dir string, e corpusEntry) error {
210+
doc := map[string]any{
211+
"tools": []map[string]string{
212+
{"name": e.ID, "description": e.Description},
213+
},
214+
}
215+
data, err := json.MarshalIndent(doc, "", " ")
216+
if err != nil {
217+
return err
218+
}
219+
return os.WriteFile(filepath.Join(dir, "tools.json"), data, 0o600)
220+
}

0 commit comments

Comments
 (0)