huntridge-labs
diff --git a/‎.ai/architecture.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.ai/architecture.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎argus.example.yml‎
Lines changed: 9 additions & 1 deletion b/‎argus.example.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎argus/core/schema.py‎
Lines changed: 37 additions & 1 deletion b/‎argus/core/schema.py‎
Lines changed: 37 additions & 1 deletion
diff --git a/‎argus/scanners/container.py‎
Lines changed: 231 additions & 3 deletions b/‎argus/scanners/container.py‎
Lines changed: 231 additions & 3 deletions
@@ -45,7 +45,7 @@ components:
       "viewers/__init__.py": "ViewerUnavailable shared exception"
       "viewers/terminal/": "`argus view --interface=terminal` — Textual TUI ([terminal] extra). Includes DiffPickerScreen + DiffScreen for scan-over-scan diff (``D`` keybind), reusing argus.core.findings_view.diff_scans."
       "viewers/browser/": "`argus view --interface=browser` — FastAPI + Jinja2 web UI, 127.0.0.1 only ([browser] extra). Routes include ``/diff?a=<path>&b=<path>`` powered by argus.core.findings_view.diff_scans, sharing the bucketing logic with the TUI's ``D`` keybind."
-      "scanners/": "Scanner modules implementing Scanner protocol (SCANNER_REGISTRY includes linters via auto-merge)"
+      "scanners/": "Scanner modules implementing Scanner protocol (SCANNER_REGISTRY includes linters via auto-merge). The container scanner orchestrates four sub-scanners on every run: trivy (CVE scan), grype (CVE scan, deduplicated against trivy), syft (SBOM), and exposure (declared-port surface from docker inspect Config.ExposedPorts). The exposure sub-scanner emits one Finding per declared port; INFO severity for ordinary application ports and MEDIUM for ports on the built-in RISKY_PORTS list (SSH 22/tcp, MySQL 3306/tcp, Redis 6379/tcp, PostgreSQL 5432/tcp, MongoDB 27017/tcp, etc.). Config knobs scanners.container.expose_warn_ports / expose_ignore_ports override or suppress."
       "linters/": "Linter modules implementing Scanner protocol (LINTER_REGISTRY auto-merges into SCANNER_REGISTRY)"
       "reporters/": "Output reporters (terminal, markdown, sarif, json, github, gitlab, junit). Discovered via the ``argus.reporters`` Python entry-point group (built-ins declared in pyproject.toml; third-party packages register additional formats without forking — see docs/contributing-reporters.md and ADR-023)."
       "preflight/": "CI preflight: provider detection, living issue reporting (GitHub/GitLab), network deps, scanner tool-readiness checks (tool_check.py)"
@@ -480,7 +480,7 @@ docsite:
       "viewers/__init__.py": "ViewerUnavailable shared exception"
       "viewers/terminal/": "`argus view --interface=terminal` — Textual TUI ([terminal] extra). Includes DiffPickerScreen + DiffScreen for scan-over-scan diff (``D`` keybind), reusing argus.core.findings_view.diff_scans."
       "viewers/browser/": "`argus view --interface=browser` — FastAPI + Jinja2 web UI, 127.0.0.1 only ([browser] extra). Routes include ``/diff?a=<path>&b=<path>`` powered by argus.core.findings_view.diff_scans, sharing the bucketing logic with the TUI's ``D`` keybind."
-      "scanners/": "Scanner modules implementing Scanner protocol (SCANNER_REGISTRY includes linters via auto-merge)"
+      "scanners/": "Scanner modules implementing Scanner protocol (SCANNER_REGISTRY includes linters via auto-merge). The container scanner orchestrates four sub-scanners on every run: trivy (CVE scan), grype (CVE scan, deduplicated against trivy), syft (SBOM), and exposure (declared-port surface from docker inspect Config.ExposedPorts). The exposure sub-scanner emits one Finding per declared port; INFO severity for ordinary application ports and MEDIUM for ports on the built-in RISKY_PORTS list (SSH 22/tcp, MySQL 3306/tcp, Redis 6379/tcp, PostgreSQL 5432/tcp, MongoDB 27017/tcp, etc.). Config knobs scanners.container.expose_warn_ports / expose_ignore_ports override or suppress."
       "linters/": "Linter modules implementing Scanner protocol (LINTER_REGISTRY auto-merges into SCANNER_REGISTRY)"
       "reporters/": "Output reporters (terminal, markdown, sarif, json, github, gitlab, junit). Discovered via the ``argus.reporters`` Python entry-point group (built-ins declared in pyproject.toml; third-party packages register additional formats without forking — see docs/contributing-reporters.md and ADR-023)."
       "tests/": "Co-located pytest tests (180 tests, 83%+ coverage)"
 
@@ -38,7 +38,15 @@ scanners:
   # container:
   #   enabled: false
   #   image_ref: "myapp:latest"
-  #   scanners: "trivy,grype,syft"
+  #   # Default set: trivy + grype CVEs, syft SBOM, exposure declared-ports.
+  #   scanners: "trivy,grype,syft,exposure"
+  #   # Attack-surface knobs for the exposure sub-scanner:
+  #   #   expose_warn_ports:  override the built-in WARN list
+  #   #                       (defaults: 22 SSH, 3306 MySQL, 6379 Redis,
+  #   #                       5432 PostgreSQL, 27017 MongoDB, etc.)
+  #   #   expose_ignore_ports: suppress findings entirely
+  #   # expose_warn_ports: ["22/tcp", "3306/tcp"]
+  #   # expose_ignore_ports: ["443/tcp", "8080/tcp"]
   #   # Private-registry auth: name an env var, never paste a literal.
   #   # The runner / shell exports the value; argus reads it at scan time.
   #   registry_username_env: REGISTRY_USER
 
@@ -38,6 +38,8 @@
     # Credential fields (either form: literal or <field>_env)
     "registry_username", "registry_password",
     "registry_username_env", "registry_password_env",
+    # Container exposure sub-scanner tuning
+    "expose_warn_ports", "expose_ignore_ports",
     # ZAP-specific tuning (decided in ADR-024)
     "api_spec", "rules_file", "cmd_options",
     "max_duration_minutes", "healthcheck_url",
@@ -81,7 +83,7 @@
 _CONTAINER_IMAGE_KEYS = {"image", "dockerfile", "context", "name", "cleanup"}
 
 # Sub-scanners argus scan container can dispatch to
-_CONTAINER_SUB_SCANNERS = {"trivy", "grype", "syft"}
+_CONTAINER_SUB_SCANNERS = {"trivy", "grype", "syft", "exposure"}
 
 
 class ConfigError:
@@ -215,6 +217,40 @@ def _validate_scanner(path: str, data: Any) -> list[ConfigError]:
                 f"Must be a positive integer, got {v!r}",
             ))
 
+    # Container exposure sub-scanner tuning — both lists must be
+    # lists of ``"PORT/PROTO"`` strings (protocol defaults to tcp
+    # when omitted; case-insensitive).
+    if scanner_name == "container":
+        for key in ("expose_warn_ports", "expose_ignore_ports"):
+            if key not in data:
+                continue
+            value = data[key]
+            if not isinstance(value, list):
+                errors.append(ConfigError(
+                    f"{path}.{key}",
+                    f"Must be a list of \"PORT/PROTO\" strings, "
+                    f"got {type(value).__name__}",
+                ))
+                continue
+            for entry in value:
+                if not isinstance(entry, str):
+                    errors.append(ConfigError(
+                        f"{path}.{key}",
+                        f"Entry must be a string \"PORT/PROTO\", "
+                        f"got {type(entry).__name__} ({entry!r})",
+                    ))
+                    continue
+                # Validate via the scanner's parser so the schema and
+                # the runtime agree on what's well-formed.
+                from argus.scanners.container import _parse_port_proto
+                if _parse_port_proto(entry) is None:
+                    errors.append(ConfigError(
+                        f"{path}.{key}",
+                        f"'{entry}' is not a valid PORT/PROTO entry. "
+                        "Expected '<port>/<tcp|udp|sctp>' (e.g. '22/tcp') "
+                        "or bare '<port>' which defaults to tcp.",
+                    ))
+
     # Warn on unknown keys (after credential / nested-block handling so
     # we don't double-warn on the keys we already validated).
     for key in data:
 
@@ -1,4 +1,4 @@
-"""Container scanner orchestrating Trivy, Grype, and Syft."""
+"""Container scanner orchestrating Trivy, Grype, Syft, and exposed-port surface."""
 
 import json
 import logging
@@ -14,6 +14,90 @@
 logger = logging.getLogger("argus")
 
 
+# ── Risky-default ports for the ``exposure`` sub-scanner ─────────────
+#
+# Services on this list are not vulnerabilities per se — the port being
+# *declared* via Dockerfile EXPOSE is itself harmless. The risk is that
+# these services historically ship with weak defaults (no-auth Redis,
+# unauthenticated PostgreSQL trust mode, SMB anonymous binding, etc.)
+# *and* are surprisingly often inherited by application images that
+# never actually intend to expose them — e.g. a base image that
+# EXPOSEs port 22 because openssh-server got pulled in as a transitive
+# dependency. The WARN severity prompts a "did you mean to expose
+# this?" review without falsely implying a known CVE.
+#
+# Keys are ``(port, protocol)`` tuples; values are the service name
+# used in the finding title. Operators can override via
+# ``scanners.container.expose_warn_ports`` in argus.yml or suppress
+# any finding entirely via ``scanners.container.expose_ignore_ports``.
+#
+# Sources for each entry:
+#   - 21/tcp, 23/tcp: cleartext protocols (FTP, Telnet) — categorically
+#     unsafe on any public network; CIS Docker Benchmark §5.8.
+#   - 22/tcp: SSH in a container is a recurring image-inheritance leak
+#     (k8s.io/community#kubectl-exec-vs-ssh-in-pod discussion thread).
+#   - 25/tcp, 110/tcp, 143/tcp: legacy mail protocols with cleartext
+#     auth in default configs.
+#   - 161/udp: SNMPv1/v2 default community strings (``public``); CVE-1999-0517.
+#   - 389/tcp: LDAP cleartext bind; 636/tcp (LDAPS) is the encrypted
+#     alternative and is not warned.
+#   - 445/tcp: SMB; never appropriate from a containerized workload
+#     without an explicit reason.
+#   - 3306/tcp (MySQL), 5432/tcp (PostgreSQL), 6379/tcp (Redis),
+#     9200/tcp (Elasticsearch), 11211/tcp (Memcached), 27017/tcp
+#     (MongoDB): default no-auth configurations. The Shodan
+#     "Unauthorized Database Access" reports cite these by name.
+#   - 3389/tcp: RDP — same rationale as SSH plus auth-bypass CVE history.
+#
+# Adding a new entry requires citing a "why" in this docstring; rule
+# is to keep operators from tuning the list blindly.
+RISKY_PORTS: dict[tuple[int, str], str] = {
+    (21, "tcp"): "FTP",
+    (22, "tcp"): "SSH",
+    (23, "tcp"): "Telnet",
+    (25, "tcp"): "SMTP",
+    (110, "tcp"): "POP3",
+    (143, "tcp"): "IMAP",
+    (161, "udp"): "SNMP",
+    (389, "tcp"): "LDAP",
+    (445, "tcp"): "SMB",
+    (3306, "tcp"): "MySQL",
+    (3389, "tcp"): "RDP",
+    (5432, "tcp"): "PostgreSQL",
+    (6379, "tcp"): "Redis",
+    (9200, "tcp"): "Elasticsearch",
+    (11211, "tcp"): "Memcached",
+    (27017, "tcp"): "MongoDB",
+}
+
+
+def _parse_port_proto(raw: str) -> tuple[int, str] | None:
+    """Parse a ``PORT/PROTO`` string into ``(port, protocol)``.
+
+    Accepts ``"22/tcp"`` (canonical), ``"22"`` (defaults to tcp),
+    ``" 22 / TCP "`` (whitespace + case tolerated). Returns ``None``
+    if the input doesn't parse — callers log + skip.
+    """
+    if not isinstance(raw, str):
+        return None
+    cleaned = raw.strip().lower().replace(" ", "")
+    if not cleaned:
+        return None
+    if "/" in cleaned:
+        port_str, proto = cleaned.split("/", 1)
+    else:
+        port_str, proto = cleaned, "tcp"
+    try:
+        port = int(port_str)
+    except ValueError:
+        return None
+    if port < 1 or port > 65535:
+        return None
+    if proto not in ("tcp", "udp", "sctp"):
+        return None
+    return (port, proto)
+
+
 class ContainerScanner:
     """Wraps Trivy, Grype, and Syft for container image scanning."""
 
@@ -98,6 +182,13 @@ def scan(self, path: str, config: dict | None = None) -> ScanResult:
                 )
                 metadata["syft"] = syft_meta
 
+            if "exposure" in enabled:
+                exposure_findings, exposure_meta = self._scan_exposed_ports(
+                    image_ref, config,
+                )
+                all_findings.extend(exposure_findings)
+                metadata["exposure"] = exposure_meta
+
             if not metadata:
                 metadata["error"] = (
                     "None of the enabled scanners "
@@ -157,10 +248,147 @@ def parse_grype_results(self, raw_output_path: Path) -> list[Finding]:
     # ------------------------------------------------------------------
 
     def _enabled_scanners(self, config: dict) -> list[str]:
-        """Return list of enabled sub-scanner names from config."""
-        raw = config.get("scanners", "trivy,grype,syft")
+        """Return list of enabled sub-scanner names from config.
+
+        Default set covers vulnerability scanning (trivy, grype),
+        SBOM generation (syft), and attack-surface visibility
+        (exposure — declared Dockerfile EXPOSE ports). Disable any
+        of them explicitly via the ``scanners`` config key.
+        """
+        raw = config.get("scanners", "trivy,grype,syft,exposure")
         return [s.strip().lower() for s in raw.split(",") if s.strip()]
 
+    def _scan_exposed_ports(
+        self, image_ref: str, config: dict,
+    ) -> tuple[list[Finding], dict]:
+        """Read ``Config.ExposedPorts`` from the image manifest.
+
+        One ``Finding`` per declared port:
+          - severity INFO for ordinary application ports;
+          - severity WARN for ports on the built-in ``RISKY_PORTS``
+            list (or the operator's override).
+        Config knobs:
+          ``scanners.container.expose_warn_ports``  – override the
+              built-in WARN list. Replaces the default; pass an empty
+              list to suppress all WARN-severity findings.
+          ``scanners.container.expose_ignore_ports`` – suppress findings
+              entirely for these ports (intended for ports the team
+              has explicitly accepted, e.g. their app's known 8080/tcp).
+        Both lists take ``"PORT/PROTO"`` strings.
+        """
+        from argus import container_runtime
+
+        rt = container_runtime.runtime_cmd()
+        if not container_runtime.is_available():
+            return [], {
+                "skipped": "no container runtime available — install Docker, "
+                           "Podman, or nerdctl to enable exposed-port discovery",
+            }
+
+        # Ensure the image is present locally before inspecting.
+        # ``if-not-present`` is a fast cache hit when trivy/grype/syft
+        # already pulled the image in this scan run.
+        if not container_runtime.pull_image(image_ref, policy="if-not-present"):
+            return [], {
+                "error": f"could not pull or locate image {image_ref} for inspection",
+            }
+
+        result = subprocess.run(
+            [rt, "image", "inspect", image_ref],
+            capture_output=True, text=True,
+        )
+        if result.returncode != 0:
+            return [], {
+                "error": (
+                    f"docker inspect failed (rc={result.returncode}): "
+                    f"{result.stderr.strip()[:300]}"
+                ),
+            }
+
+        try:
+            inspected = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            return [], {"error": f"could not parse docker inspect output: {exc}"}
+
+        if not isinstance(inspected, list) or not inspected:
+            return [], {"error": "docker inspect returned no image entries"}
+
+        config_block = inspected[0].get("Config") or {}
+        exposed = config_block.get("ExposedPorts") or {}
+
+        # Resolve config-driven WARN-list override and ignore-list.
+        warn_override = config.get("expose_warn_ports")
+        if warn_override is not None:
+            # Operator-provided list REPLACES the built-in defaults.
+            warn_set = {
+                pp for raw in warn_override
+                if (pp := _parse_port_proto(raw)) is not None
+            }
+        else:
+            warn_set = set(RISKY_PORTS.keys())
+
+        ignore_set = {
+            pp for raw in (config.get("expose_ignore_ports") or [])
+            if (pp := _parse_port_proto(raw)) is not None
+        }
+
+        findings: list[Finding] = []
+        ignored_count = 0
+        for raw_port in sorted(exposed.keys()):
+            parsed = _parse_port_proto(raw_port)
+            if parsed is None:
+                logger.warning(
+                    "Skipping unparsable port reference '%s' in %s ExposedPorts",
+                    raw_port, image_ref,
+                )
+                continue
+            port, proto = parsed
+            if (port, proto) in ignore_set:
+                ignored_count += 1
+                continue
+
+            is_risky = (port, proto) in warn_set
+            service = RISKY_PORTS.get((port, proto))
+            severity = Severity.MEDIUM if is_risky else Severity.INFO
+            title_service = f" ({service})" if service else ""
+            description = (
+                f"Image declares EXPOSE for port {port}/{proto}{title_service}. "
+                + (
+                    "This is on the risky-defaults watchlist — services on "
+                    "this port have a history of weak default configurations. "
+                    "Confirm the container actually intends to listen here "
+                    "and that authentication/TLS is in front of it."
+                    if is_risky else
+                    "Declared exposed port — informational. No action required "
+                    "unless the port is unexpected for this image."
+                )
+            )
+            findings.append(
+                Finding(
+                    id=f"EXPOSE-{port}-{proto}",
+                    severity=severity,
+                    title=(
+                        f"Port {port}/{proto}{title_service} declared exposed"
+                    ),
+                    description=description,
+                    scanner=self.name,
+                    metadata={
+                        "port": port,
+                        "protocol": proto,
+                        "common_service": service or "",
+                        "risky": is_risky,
+                        "image_ref": image_ref,
+                    },
+                ),
+            )
+
+        return findings, {
+            "execution": "local-inspect",
+            "ports_declared": len(exposed),
+            "ports_reported": len(findings),
+            "ports_ignored": ignored_count,
+        }
+
     def _build_env(self, config: dict) -> dict[str, str]:
         """Build environment dict with optional registry credentials.