|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + "io" |
| 8 | + "os" |
| 9 | + "path/filepath" |
| 10 | + "sort" |
| 11 | + "strings" |
| 12 | + |
| 13 | + "go.uber.org/zap" |
| 14 | + |
| 15 | + "github.com/smart-mcp-proxy/mcpproxy-go/internal/security/scanner" |
| 16 | +) |
| 17 | + |
| 18 | +// scannerRunner executes one scanner against one corpus entry and returns its |
| 19 | +// normalized findings. It is injected so unit tests can supply a deterministic |
| 20 | +// mock while production wires a Docker-backed implementation (--scanners). |
| 21 | +type scannerRunner func(ctx context.Context, p *scanner.ScannerPlugin, e corpusEntry) ([]scanner.ScanFinding, error) |
| 22 | + |
| 23 | +// selectScanners resolves a comma-separated list of scanner ids against the |
| 24 | +// registry and partitions them into runnable vs skipped under the current |
| 25 | +// constraints. An unknown id is a hard config error (never a silent skip); |
| 26 | +// the caller maps it to exit 4. Both slices are sorted by id for deterministic |
| 27 | +// output (INV-5). Selection is pure — warnings are emitted by the caller via |
| 28 | +// runnabilityReason so this stays trivially testable. |
| 29 | +func selectScanners(reg *scanner.Registry, csv string, dockerEnabled bool, lookupEnv func(string) (string, bool)) (run, skipped []*scanner.ScannerPlugin, err error) { |
| 30 | + seen := make(map[string]bool) |
| 31 | + selected := make([]*scanner.ScannerPlugin, 0) |
| 32 | + for _, raw := range strings.Split(csv, ",") { |
| 33 | + id := strings.TrimSpace(raw) |
| 34 | + if id == "" || seen[id] { |
| 35 | + continue |
| 36 | + } |
| 37 | + seen[id] = true |
| 38 | + p, gerr := reg.Get(id) |
| 39 | + if gerr != nil { |
| 40 | + return nil, nil, fmt.Errorf("unknown scanner %q: %w", id, gerr) |
| 41 | + } |
| 42 | + selected = append(selected, p) |
| 43 | + } |
| 44 | + sort.Slice(selected, func(i, j int) bool { return selected[i].ID < selected[j].ID }) |
| 45 | + |
| 46 | + for _, p := range selected { |
| 47 | + if runnabilityReason(p, dockerEnabled, lookupEnv) != "" { |
| 48 | + skipped = append(skipped, p) |
| 49 | + } else { |
| 50 | + run = append(run, p) |
| 51 | + } |
| 52 | + } |
| 53 | + return run, skipped, nil |
| 54 | +} |
| 55 | + |
| 56 | +// runnabilityReason returns "" when the scanner can run under the given |
| 57 | +// constraints, otherwise a human-readable reason it must be skipped. Used both |
| 58 | +// to partition in selectScanners and to explain the skip to the operator, so |
| 59 | +// the gating rules live in exactly one place (DRY). Order: Docker is the |
| 60 | +// cheapest gate, then secrets. Network-req scanners are NOT gated here — the |
| 61 | +// Docker gate above subsumes that (Docker-off → everything skipped). |
| 62 | +func runnabilityReason(p *scanner.ScannerPlugin, dockerEnabled bool, lookupEnv func(string) (string, bool)) string { |
| 63 | + if !dockerEnabled { |
| 64 | + return "Docker isolation disabled (set MCPPROXY_SCAN_EVAL_DOCKER=1 to enable)" |
| 65 | + } |
| 66 | + for _, req := range p.RequiredEnv { |
| 67 | + if _, ok := lookupEnv(req.Key); !ok { |
| 68 | + return fmt.Sprintf("missing required secret %s", req.Key) |
| 69 | + } |
| 70 | + } |
| 71 | + // Network-req scanners are NOT skipped here: when Docker is available the |
| 72 | + // operator explicitly opted in via --scanners + MCPPROXY_SCAN_EVAL_DOCKER=1, |
| 73 | + // and running the scanner (even offline — the runner enforces NetworkMode=none, |
| 74 | + // Security-by-Default) is preferred over silently skipping it. The Docker gate |
| 75 | + // above already covers the Docker-unavailable case (everything skipped), so |
| 76 | + // reaching here means Docker IS enabled. |
| 77 | + return "" |
| 78 | +} |
| 79 | + |
| 80 | +// severityRank orders severities for max-severity computation. info, the empty |
| 81 | +// string, and unknown values all rank 0 so they neither flag nor set |
| 82 | +// max_severity — the schema enum is {critical,high,medium,low} only, and info |
| 83 | +// findings are kept solely as provenance in detections. |
| 84 | +func severityRank(s string) int { |
| 85 | + switch s { |
| 86 | + case scanner.SeverityCritical: |
| 87 | + return 4 |
| 88 | + case scanner.SeverityHigh: |
| 89 | + return 3 |
| 90 | + case scanner.SeverityMedium: |
| 91 | + return 2 |
| 92 | + case scanner.SeverityLow: |
| 93 | + return 1 |
| 94 | + default: |
| 95 | + return 0 |
| 96 | + } |
| 97 | +} |
| 98 | + |
| 99 | +// scanFindingsToVerdict projects a scanner's findings into one detectorVerdict. |
| 100 | +// Every finding (including info) is recorded in detections for provenance, but |
| 101 | +// only {critical,high,medium,low} contribute to flagged/max_severity, so the |
| 102 | +// flagged ⇔ max_severity!="" invariant holds. Detections is always non-nil. |
| 103 | +func scanFindingsToVerdict(id string, findings []scanner.ScanFinding) detectorVerdict { |
| 104 | + v := detectorVerdict{ |
| 105 | + Detector: id, |
| 106 | + Detections: make([]detectionView, 0, len(findings)), |
| 107 | + } |
| 108 | + for _, f := range findings { |
| 109 | + v.Detections = append(v.Detections, detectionView{ |
| 110 | + Type: f.RuleID, |
| 111 | + Category: f.Category, |
| 112 | + Severity: f.Severity, |
| 113 | + }) |
| 114 | + if severityRank(f.Severity) > severityRank(v.MaxSeverity) { |
| 115 | + v.MaxSeverity = f.Severity |
| 116 | + v.Flagged = true |
| 117 | + } |
| 118 | + } |
| 119 | + return v |
| 120 | +} |
| 121 | + |
| 122 | +// appendScannerVerdicts augments an existing detector report in place: each |
| 123 | +// plugin id is appended to report.Detectors and every entry gains one verdict |
| 124 | +// per plugin. A per-entry runner error is a safe non-flag (an unavailable |
| 125 | +// scanner must never manufacture a finding) plus a one-line stderr warning. |
| 126 | +// Entries are matched to corpus entries by id rather than slice position so a |
| 127 | +// reordered or partial report stays correct. |
| 128 | +func appendScannerVerdicts(report *verdictReport, c *corpus, plugins []*scanner.ScannerPlugin, runner scannerRunner, stderr io.Writer) { |
| 129 | + if len(plugins) == 0 { |
| 130 | + return |
| 131 | + } |
| 132 | + byID := make(map[string]corpusEntry, len(c.Entries)) |
| 133 | + for _, e := range c.Entries { |
| 134 | + byID[e.ID] = e |
| 135 | + } |
| 136 | + for _, p := range plugins { |
| 137 | + report.Detectors = append(report.Detectors, p.ID) |
| 138 | + } |
| 139 | + for i := range report.Entries { |
| 140 | + entry := &report.Entries[i] |
| 141 | + ce, ok := byID[entry.ID] |
| 142 | + for _, p := range plugins { |
| 143 | + if !ok { |
| 144 | + entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil)) |
| 145 | + continue |
| 146 | + } |
| 147 | + findings, rerr := runner(context.Background(), p, ce) |
| 148 | + if rerr != nil { |
| 149 | + fmt.Fprintf(stderr, "warning: scanner %s failed on entry %s: %v\n", p.ID, entry.ID, rerr) |
| 150 | + entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, nil)) |
| 151 | + continue |
| 152 | + } |
| 153 | + entry.Verdicts = append(entry.Verdicts, scanFindingsToVerdict(p.ID, findings)) |
| 154 | + } |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +// applyScanners resolves the requested scanner ids against the registry, warns |
| 159 | +// about any skipped under the current constraints, and — for the runnable set — |
| 160 | +// appends their verdicts to the report in place. An unknown id is a hard config |
| 161 | +// error the caller maps to exit 4 (never a silent skip); a skip is a warning, |
| 162 | +// never an error, so the detector verdicts still emit. runner is injected for |
| 163 | +// tests; when nil a Docker-backed runner is constructed (offline-by-default). |
| 164 | +// The scratch base dir is created lazily so the docker-disabled path touches no |
| 165 | +// filesystem and emits clean JSON. |
| 166 | +func applyScanners(report *verdictReport, c *corpus, reg *scanner.Registry, scannerIDs string, dockerEnabled bool, lookupEnv func(string) (string, bool), runner scannerRunner, stderr io.Writer) error { |
| 167 | + run, skipped, err := selectScanners(reg, scannerIDs, dockerEnabled, lookupEnv) |
| 168 | + if err != nil { |
| 169 | + return err |
| 170 | + } |
| 171 | + for _, p := range skipped { |
| 172 | + fmt.Fprintf(stderr, "warning: skipping scanner %s: %s\n", p.ID, runnabilityReason(p, dockerEnabled, lookupEnv)) |
| 173 | + } |
| 174 | + if len(run) == 0 { |
| 175 | + return nil |
| 176 | + } |
| 177 | + if runner == nil { |
| 178 | + baseDir, mkErr := os.MkdirTemp("", "scan-eval-") |
| 179 | + if mkErr != nil { |
| 180 | + return fmt.Errorf("scanner work dir: %w", mkErr) |
| 181 | + } |
| 182 | + defer os.RemoveAll(baseDir) |
| 183 | + runner = newDockerScannerRunner(scanner.NewDockerRunner(zap.NewNop()), baseDir, lookupEnv) |
| 184 | + } |
| 185 | + appendScannerVerdicts(report, c, run, runner, stderr) |
| 186 | + return nil |
| 187 | +} |
| 188 | + |
| 189 | +// findingsFromReport parses a scanner's report bytes into normalized findings, |
| 190 | +// tagged with the scanner id. The runner is SARIF-only and safe-by-default: any |
| 191 | +// report that is not valid SARIF (empty, non-SARIF JSON, or malformed) yields no |
| 192 | +// findings rather than an error, so an unreadable report can never manufacture a |
| 193 | +// verdict (security-by-default, constitution). |
| 194 | +func findingsFromReport(id string, data []byte) []scanner.ScanFinding { |
| 195 | + if !scanner.IsSARIF(data) { |
| 196 | + return nil |
| 197 | + } |
| 198 | + report, err := scanner.ParseSARIF(data) |
| 199 | + if err != nil { |
| 200 | + return nil |
| 201 | + } |
| 202 | + return scanner.NormalizeFindings(report, id) |
| 203 | +} |
| 204 | + |
| 205 | +// writeToolsJSON materializes a corpus entry as a single-tool source tree in the |
| 206 | +// {"tools":[{name,description}]} shape the bundled scanners read (mirrors |
| 207 | +// scanner.Service.exportToolDefinitions). The entry id becomes the tool name and |
| 208 | +// the corpus description becomes the tool description the scanners inspect. |
| 209 | +func writeToolsJSON(dir string, e corpusEntry) error { |
| 210 | + doc := map[string]any{ |
| 211 | + "tools": []map[string]string{ |
| 212 | + {"name": e.ID, "description": e.Description}, |
| 213 | + }, |
| 214 | + } |
| 215 | + data, err := json.MarshalIndent(doc, "", " ") |
| 216 | + if err != nil { |
| 217 | + return err |
| 218 | + } |
| 219 | + return os.WriteFile(filepath.Join(dir, "tools.json"), data, 0o600) |
| 220 | +} |
0 commit comments