huntridge-labs
diff --git a/‎.ai/architecture.yaml‎
Lines changed: 4 additions & 2 deletions b/‎.ai/architecture.yaml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.ai/errors.yaml‎
Lines changed: 29 additions & 6 deletions b/‎.ai/errors.yaml‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎argus.example.yml‎
Lines changed: 20 additions & 0 deletions b/‎argus.example.yml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎argus/core/engine.py‎
Lines changed: 42 additions & 0 deletions b/‎argus/core/engine.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎argus/core/schema.py‎
Lines changed: 143 additions & 1 deletion b/‎argus/core/schema.py‎
Lines changed: 143 additions & 1 deletion
@@ -33,11 +33,12 @@ components:
       "core/models.py": "Severity enum, Finding/ScanResult/ScanSummary dataclasses"
       "core/scanner.py": "Scanner Protocol definition"
       "core/config.py": "ArgusConfig loading from argus.yml"
-      "core/engine.py": "ArgusEngine orchestrating scanners and aggregating results (delegates to core/prewarm.py for background image pre-warm + lazy pulls in parallel mode)"
+      "core/engine.py": "ArgusEngine orchestrating scanners and aggregating results (delegates to core/prewarm.py for background image pre-warm + lazy pulls in parallel mode). Container path honors three optional Scanner hooks: container_args/build_args (CLI argv), container_env(config)->dict (extra -e flags; None values are filtered so unset env-var-name refs don't leak as 'NAME=None'), and container_mounts(config)->[(host,container)] (extra read-only bind mounts; non-existent host paths are skipped with a WARNING rather than aborting the scan)."
       "core/prewarm.py": "ImagePrewarmer — best-effort background container image pulls. Dedup'd by image ref, capped concurrency (execution.prewarm_workers, default 4), opt-out via execution.prewarm_images: false. Skipped when pull_policy=never. Pre-warm failure falls back to inline pull in _run_in_container."
       "core/exclusions.py": "Path exclusion set (builtins + .gitignore + config + CLI), ``**``-glob matcher"
       "core/tool_config.py": "Auto-discovery of per-scanner canonical config files (.bandit, .checkov.yaml, trivy.yaml, osv-scanner.toml, semgrep.yml)"
       "core/sbom.py": "SBOM format detection (CycloneDX JSON/XML, SPDX JSON/tag-value, Syft JSON) for ``argus scan --sbom``"
+      "core/secrets.py": "Credential resolution for any scanner config field. resolve_secret(config, field) accepts either <field> (literal, warned at config-load if vendor-shaped via looks_like_literal_secret) or <field>_env (env-var name reference; reads os.environ at scan time). validate_env_var_name enforces POSIX shell identifier rules. Stdlib-only. Used by scanners.container and scanners.zap; future scanners with credential needs use the same helper. See ADR-024."
       "core/findings_view.py": "Shared UI-free logic for findings display — ViewState, SEVERITY_ORDER, finding_detail_rows, compute_summary, diff_scans (scan-over-scan bucketing keyed off (scanner, id, location)). Consumed by argus view terminal (TUI ``D`` keybind, DiffScreen) and argus view browser (web UI ``/diff`` route)."
       "viewers/": "`argus view` interfaces (optional extras)"
       "viewers/__init__.py": "ViewerUnavailable shared exception"
@@ -465,11 +466,12 @@ docsite:
       "core/models.py": "Severity enum, Finding/ScanResult/ScanSummary dataclasses"
       "core/scanner.py": "Scanner Protocol definition"
       "core/config.py": "ArgusConfig loading from argus.yml"
-      "core/engine.py": "ArgusEngine orchestrating scanners and aggregating results (delegates to core/prewarm.py for background image pre-warm + lazy pulls in parallel mode)"
+      "core/engine.py": "ArgusEngine orchestrating scanners and aggregating results (delegates to core/prewarm.py for background image pre-warm + lazy pulls in parallel mode). Container path honors three optional Scanner hooks: container_args/build_args (CLI argv), container_env(config)->dict (extra -e flags; None values are filtered so unset env-var-name refs don't leak as 'NAME=None'), and container_mounts(config)->[(host,container)] (extra read-only bind mounts; non-existent host paths are skipped with a WARNING rather than aborting the scan)."
       "core/prewarm.py": "ImagePrewarmer — best-effort background container image pulls. Dedup'd by image ref, capped concurrency (execution.prewarm_workers, default 4), opt-out via execution.prewarm_images: false. Skipped when pull_policy=never. Pre-warm failure falls back to inline pull in _run_in_container."
       "core/exclusions.py": "Path exclusion set (builtins + .gitignore + config + CLI), ``**``-glob matcher"
       "core/tool_config.py": "Auto-discovery of per-scanner canonical config files (.bandit, .checkov.yaml, trivy.yaml, osv-scanner.toml, semgrep.yml)"
       "core/sbom.py": "SBOM format detection (CycloneDX JSON/XML, SPDX JSON/tag-value, Syft JSON) for ``argus scan --sbom``"
+      "core/secrets.py": "Credential resolution for any scanner config field. resolve_secret(config, field) accepts either <field> (literal, warned at config-load if vendor-shaped via looks_like_literal_secret) or <field>_env (env-var name reference; reads os.environ at scan time). validate_env_var_name enforces POSIX shell identifier rules. Stdlib-only. Used by scanners.container and scanners.zap; future scanners with credential needs use the same helper. See ADR-024."
       "core/findings_view.py": "Shared UI-free logic for findings display — ViewState, SEVERITY_ORDER, finding_detail_rows, compute_summary, diff_scans (scan-over-scan bucketing keyed off (scanner, id, location)). Consumed by argus view terminal (TUI ``D`` keybind, DiffScreen) and argus view browser (web UI ``/diff`` route)."
       "viewers/": "`argus view` interfaces (optional extras)"
       "viewers/__init__.py": "ViewerUnavailable shared exception"
 
@@ -146,13 +146,36 @@ error_patterns:
 
     solution:
       steps:
-        - "Add registry credentials to GitHub Secrets"
-        - "Pass credentials to workflow"
+        - "Store credentials as environment variables (CI runner env, .envrc, etc.) — never as YAML literals."
+        - "Reference the env-var *name* from argus.yml using the <field>_env shape; argus.core.secrets.resolve_secret reads os.environ at scan time."
+        - "For composite-action consumers on GitHub Actions, the runner-side env can still be wired from `${{ secrets.X }}`."
       code: |
-        with:
-          registry_username: ${{ secrets.REGISTRY_USER }}
-          registry_password: ${{ secrets.REGISTRY_TOKEN }}
-      warning: "Never put credentials in config file"
+        # Preferred — argus.yml
+        scanners:
+          container:
+            registry_username_env: REGISTRY_USER     # name of env var
+            registry_password_env: REGISTRY_TOKEN    # name of env var
+      ci_example: |
+        # GitHub Actions: export the secret into the runner env
+        - name: Run argus scan
+          env:
+            REGISTRY_USER: ${{ secrets.REGISTRY_USER }}
+            REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
+          run: argus scan --config argus.yml
+      legacy_supported: |
+        # Still works (warned at config-load if the value matches
+        # a known vendor secret prefix):
+        scanners:
+          container:
+            registry_username: "user"
+            registry_password: "literal"
+      warning: |
+        Never commit literal credentials to argus.yml. The
+        <field>_env shape keeps secret values out of VCS entirely;
+        argus.core.secrets emits a warning if a literal value
+        matches a known vendor-secret prefix (gh*, AKIA, AIza,
+        glpat-, etc.) at config-load time so the leak is caught
+        before the scan runs.
 
   # ==============================================================================
   # PR COMMENT ERRORS
 
@@ -39,10 +39,30 @@ scanners:
   #   enabled: false
   #   image_ref: "myapp:latest"
   #   scanners: "trivy,grype,syft"
+  #   # Private-registry auth: name an env var, never paste a literal.
+  #   # The runner / shell exports the value; argus reads it at scan time.
+  #   registry_username_env: REGISTRY_USER
+  #   registry_password_env: REGISTRY_TOKEN
 
   # zap:
   #   enabled: false
   #   target_url: "http://localhost:3000"
+  #   #
+  #   # Optional tuning (decided in ADR-024; container backend only):
+  #   #
+  #   # scan_type: baseline           # baseline | full | api
+  #   # api_spec: "http://localhost:3000/openapi.json"   # auto-switches to api scan
+  #   # rules_file: ".zap/rules.tsv"  # ZAP ignore rules; mounted into container
+  #   # max_duration_minutes: 30      # hard cap on scan time
+  #   # cmd_options:                  # appended verbatim after built-in flags
+  #   #   - "-z"
+  #   #   - "-config view.locale=en_GB"
+  #   #
+  #   # Authenticated scan via ZAP context file (user-authored):
+  #   # auth:
+  #   #   context_file: ".zap/context.xml"
+  #   #   username_env: ZAP_APP_USER       # env-var *name*
+  #   #   password_env: ZAP_APP_PASSWORD
 
 reporting:
   # ``argus-results.json`` is the canonical scan artifact and is
 
@@ -832,6 +832,48 @@ def _run_in_container(
                 docker_cmd.extend(["--entrypoint", entrypoint])
                 logger.debug("Overriding entrypoint: %s", entrypoint)
 
+            # Optional per-scanner env vars — e.g. credentials resolved
+            # via ``argus.core.secrets.resolve_secret``. Scanners that
+            # need to pass authentication or runtime parameters to the
+            # tool implement ``container_env(config) -> dict[str, str]``;
+            # the engine forwards each entry as ``-e NAME=VALUE``.
+            # Values are passed by content, not by name-passthrough,
+            # so unset host env vars don't accidentally inherit.
+            if hasattr(scanner, "container_env"):
+                extra_env = scanner.container_env(config or {}) or {}
+                for env_name, env_value in extra_env.items():
+                    if env_value is None:
+                        continue
+                    docker_cmd.extend(["-e", f"{env_name}={env_value}"])
+                if extra_env:
+                    logger.debug(
+                        "Scanner '%s' injected %d env var(s) into container",
+                        scanner.name, len([v for v in extra_env.values() if v is not None]),
+                    )
+
+            # Optional per-scanner read-only bind mounts — e.g. ZAP
+            # context files or ignore-rules files. Scanners return a
+            # list of ``(host_path, container_path)`` tuples from
+            # ``container_mounts(config)``. Host paths are resolved to
+            # absolute paths; entries pointing at non-existent files
+            # are skipped with a warning so a typo'd config doesn't
+            # take down the whole scan.
+            if hasattr(scanner, "container_mounts"):
+                for mount in scanner.container_mounts(config or {}) or []:
+                    host_path, container_path = mount
+                    abs_host = str(Path(host_path).resolve())
+                    if not Path(abs_host).exists():
+                        logger.warning(
+                            "Scanner '%s' requested mount of '%s' but the "
+                            "path does not exist — skipping",
+                            scanner.name, host_path,
+                        )
+                        continue
+                    docker_cmd.extend(["-v", f"{abs_host}:{container_path}:ro"])
+                    logger.debug(
+                        "Scanner mount: %s → %s (ro)", abs_host, container_path,
+                    )
+
             # Prefer the unified ``build_args(ScanPaths)`` shape (single
             # source of truth for both local and container CLI args).
             # Fall back to legacy ``container_args(config)`` for scanners
 
@@ -10,6 +10,11 @@
 import logging
 from typing import Any
 
+from argus.core.secrets import (
+    looks_like_literal_secret,
+    validate_env_var_name,
+)
+
 logger = logging.getLogger("argus")
 
 # ── Schema definition ────────────────────────────────────────────────
@@ -30,7 +35,30 @@
     # Scanner-specific keys that are valid in extra
     "image_ref", "target_url", "scanners", "scan_type",
     "framework", "check", "skip_check", "config",
+    # Credential fields (either form: literal or <field>_env)
     "registry_username", "registry_password",
+    "registry_username_env", "registry_password_env",
+    # ZAP-specific tuning (decided in ADR-024)
+    "api_spec", "rules_file", "cmd_options",
+    "max_duration_minutes", "healthcheck_url",
+    "app_image_ref", "app_ports",
+    "auth",  # nested block; sub-keys validated separately
+}
+
+# ZAP web-app auth sub-block keys (under scanners.zap.auth.*)
+_ZAP_AUTH_KEYS = {
+    "context_file",
+    "username", "username_env",
+    "password", "password_env",
+}
+
+# Credential fields per scanner — drives validate_secret_field rules.
+# Each entry is (scanner_name, field_path).
+_CREDENTIAL_FIELDS: dict[str, tuple[str, ...]] = {
+    "container": ("registry_username", "registry_password"),
+    "zap": ("registry_username", "registry_password"),
+    # zap.auth.username / zap.auth.password handled separately
+    # because they live in a nested block.
 }
 
 # Known reporting keys
@@ -158,7 +186,36 @@ def _validate_scanner(path: str, data: Any) -> list[ConfigError]:
     if "exclude" in data and not isinstance(data["exclude"], str):
         errors.append(ConfigError(f"{path}.exclude", "Must be a comma-separated string"))
 
-    # Warn on unknown keys
+    # Credential fields — validate either-form contract (literal or *_env).
+    # Scanner name is the last path segment ("scanners.<name>").
+    scanner_name = path.rsplit(".", 1)[-1]
+    for cred_field in _CREDENTIAL_FIELDS.get(scanner_name, ()):
+        errors.extend(_validate_secret_field(data, cred_field, path))
+
+    # ZAP web-app auth sub-block (nested under scanners.zap.auth.*)
+    if scanner_name == "zap" and "auth" in data:
+        errors.extend(_validate_zap_auth(f"{path}.auth", data["auth"]))
+
+    # ZAP cmd_options must be a list of strings
+    if scanner_name == "zap" and "cmd_options" in data:
+        opts = data["cmd_options"]
+        if not isinstance(opts, list) or not all(isinstance(o, str) for o in opts):
+            errors.append(ConfigError(
+                f"{path}.cmd_options",
+                "Must be a list of strings (passed verbatim to the ZAP CLI)",
+            ))
+
+    # ZAP max_duration_minutes must be a positive int
+    if scanner_name == "zap" and "max_duration_minutes" in data:
+        v = data["max_duration_minutes"]
+        if not isinstance(v, int) or isinstance(v, bool) or v <= 0:
+            errors.append(ConfigError(
+                f"{path}.max_duration_minutes",
+                f"Must be a positive integer, got {v!r}",
+            ))
+
+    # Warn on unknown keys (after credential / nested-block handling so
+    # we don't double-warn on the keys we already validated).
     for key in data:
         if key not in _SCANNER_KNOWN_KEYS:
             errors.append(ConfigError(
@@ -170,6 +227,91 @@ def _validate_scanner(path: str, data: Any) -> list[ConfigError]:
     return errors
 
 
+def _validate_secret_field(
+    data: dict, field: str, path: str,
+) -> list[ConfigError]:
+    """Validate a credential field that follows the <field>/<field>_env contract.
+
+    - ``<field>_env`` must be a valid POSIX shell identifier.
+    - ``<field>`` literal is allowed but warned if it looks like a
+      known vendor secret (gh*, AKIA, AIza, etc.).
+    - Both set is a warning (the resolver uses _env).
+    """
+    errors: list[ConfigError] = []
+    env_field = f"{field}_env"
+
+    if env_field in data:
+        name = data[env_field]
+        if not isinstance(name, str):
+            errors.append(ConfigError(
+                f"{path}.{env_field}",
+                f"Must be a string environment variable name, "
+                f"got {type(name).__name__}",
+            ))
+        elif not validate_env_var_name(name):
+            errors.append(ConfigError(
+                f"{path}.{env_field}",
+                f"'{name}' is not a valid environment variable name "
+                f"(must match [A-Za-z_][A-Za-z0-9_]*)",
+            ))
+
+    if field in data:
+        value = data[field]
+        if isinstance(value, str) and looks_like_literal_secret(value):
+            errors.append(ConfigError(
+                f"{path}.{field}",
+                f"Looks like a literal vendor secret. Prefer "
+                f"'{env_field}: <ENV_VAR_NAME>' to keep credentials "
+                f"out of argus.yml.",
+                level="warning",
+            ))
+
+    if field in data and env_field in data:
+        errors.append(ConfigError(
+            f"{path}.{field}",
+            f"Both '{field}' and '{env_field}' are set — only one "
+            f"should be used. '{env_field}' takes precedence at resolution.",
+            level="warning",
+        ))
+
+    return errors
+
+
+def _validate_zap_auth(path: str, data: Any) -> list[ConfigError]:
+    """Validate the ``scanners.zap.auth`` sub-block.
+
+    Holds the ZAP context-file path plus credential references for
+    web-app authentication. Credentials follow the same <field> /
+    <field>_env contract as registry auth.
+    """
+    errors: list[ConfigError] = []
+
+    if not isinstance(data, dict):
+        errors.append(ConfigError(
+            path, f"Must be a mapping, got {type(data).__name__}",
+        ))
+        return errors
+
+    for key in data:
+        if key not in _ZAP_AUTH_KEYS:
+            errors.append(ConfigError(
+                f"{path}.{key}",
+                f"Unknown auth key '{key}'. "
+                f"Valid keys: {', '.join(sorted(_ZAP_AUTH_KEYS))}",
+                level="warning",
+            ))
+
+    if "context_file" in data and not isinstance(data["context_file"], str):
+        errors.append(ConfigError(
+            f"{path}.context_file", "Must be a string path",
+        ))
+
+    errors.extend(_validate_secret_field(data, "username", path))
+    errors.extend(_validate_secret_field(data, "password", path))
+
+    return errors
+
+
 def _validate_reporting(path: str, data: Any) -> list[ConfigError]:
     """Validate the reporting config block."""
     errors: list[ConfigError] = []