diff --git a/argus.example.yml b/argus.example.yml index 5b46d238..a556e4e5 100644 --- a/argus.example.yml +++ b/argus.example.yml @@ -55,6 +55,53 @@ reporting: severity_threshold: high output_dir: "./argus-results" +# Container lifecycle targets (consumed by ``argus scan container``). +# Defining anything under this top-level ``containers:`` key activates +# config-driven container scans — no need to pass --image / --discover +# on the CLI. CLI flags, when supplied, override the matching keys +# below: explicit > implicit. +# +# RECOMMENDED: pin every image to an immutable digest. ``:tag`` +# references are mutable — the same ``myorg/app:1.4.0`` can publish +# different bytes over time, which means CVE attribution drifts and +# scan results aren't reproducible. ``@sha256:...`` references are +# byte-level immutable: the scanner reads exactly what you pinned, +# every run, forever. Renovate and Dependabot can both update +# digest-pinned references automatically. +# +# containers: +# images: +# # PREFERRED: tag + digest (human-readable + immutable) +# - image: ghcr.io/myorg/app:1.4.0@sha256:f1e2d3c4b5a6f7e8d9c0b1a2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2 +# +# # ALSO PREFERRED: digest-only (immutable, no tag noise) +# - image: ghcr.io/myorg/worker@sha256:0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b +# +# # Build-then-scan: digest only meaningful after first publish. +# # The dockerfile is built and tagged locally, so it's reproducible +# # within one CI run even without a pinned digest. +# - image: myorg/inhouse:dev +# dockerfile: docker/Dockerfile +# context: . +# +# # ACCEPTABLE FALLBACK: tag-only. Easier to read, but mutable — +# # use only for ad-hoc scans or in environments where digest +# # discovery isn't yet wired up. Plan to migrate to a pinned +# # form before relying on the scan output for an audit trail. +# # - image: myorg/legacy:1.0.0 +# +# # OR auto-discovery. Walks ``search_paths`` for Dockerfile, +# # Dockerfile., .Dockerfile, then builds + scans each +# # image locally. Builds inherit your local docker daemon, so +# # reproducibility is bounded by your build cache. +# discover: true +# search_paths: +# - . +# - docker/ +# +# # Override which sub-scanners run; default is trivy + grype. +# scanners: [trivy, grype, syft] + # Execution backend configuration execution: # auto (default): containers for immutable, reproducible scans diff --git a/argus/cli.py b/argus/cli.py index 034f7cba..68a854e5 100644 --- a/argus/cli.py +++ b/argus/cli.py @@ -1016,20 +1016,39 @@ def cmd_scan(args: argparse.Namespace) -> int: except ImportError: pass - # Container lifecycle — needs --discover or --image + # Container lifecycle — activated by EITHER CLI flags OR a config + # file with a populated ``containers:`` block. Load config first + # so a config-only invocation (``argus scan container --config + # argus.yml`` with no --image/--discover) reaches the lifecycle + # path; the previous gate looked at CLI flags only and shipped a + # confusing usage error before config was even consulted. if args.scanner == "container": - if _is_container_lifecycle(args): - return _cmd_container_scan(args) + try: + container_config = _load_container_config(args) + except ValueError as exc: + print(f"Error: {exc}", file=sys.stderr) + return EXIT_ERROR + + if ( + _is_container_lifecycle(args) + or _container_config_has_targets(container_config) + ): + return _cmd_container_scan(args, container_config=container_config) + print( - "Usage: argus scan container [--discover PATH | --image REF]\n\n" - "Container image scanning requires one of:\n" - " --discover PATH Discover Dockerfiles and scan all images\n" - " --image REF Scan a specific image (can be repeated)\n\n" + "Usage: argus scan container " + "[--config FILE | --discover PATH | --image REF]\n\n" + "Container image scanning needs at least one source of targets:\n" + " --image REF Scan a specific image (CLI, repeatable)\n" + " --discover PATH Discover Dockerfiles in PATH\n" + " --config FILE Load `containers.images` and/or " + "`containers.discover`\n" + " from a YAML config file (e.g. argus.yml).\n\n" "Examples:\n" - " argus scan container --discover ./\n" - " argus scan container --discover docker/\n" " argus scan container --image nginx:latest\n" - " argus scan container --image myapp:v1 --image worker:v1\n", + " argus scan container --discover ./docker/\n" + " argus scan container --config argus.yml\n" + " argus scan container --config argus.yml --image extra:tag\n", file=sys.stderr, ) return EXIT_ERROR @@ -1056,13 +1075,106 @@ def cmd_scan(args: argparse.Namespace) -> int: def _is_container_lifecycle(args: argparse.Namespace) -> bool: - """Check if container lifecycle flags are present.""" + """Check if container lifecycle CLI flags are present. + + Note: this is the CLI-only signal. Config-defined targets in + ``argus.yml`` (a ``containers.images`` list or + ``containers.discover`` flag) also activate the container lifecycle — + that path goes through ``_load_container_config`` / + ``_container_config_has_targets``, which the dispatcher consults + alongside this CLI-flag check before deciding whether to fall + back to the usage-error gate. + """ return bool( getattr(args, "discover", None) is not None or getattr(args, "images", None) ) +def _load_container_config(args: argparse.Namespace) -> dict: + """Build the container-scan config from --config + CLI overrides. + + The caller can supply targets one of three ways (or any + combination): an explicit ``--config FILE`` (top-level + ``containers:`` block), repeated ``--image REF`` flags, or + ``--discover PATH``. CLI flags take precedence over config-file + values for the keys they touch — explicit > implicit. + + Raises ``ValueError`` with an actionable message when the config + file is unreadable, isn't a YAML mapping, or has a malformed + ``containers`` section. The dispatcher catches this and prints + the message before exiting EXIT_ERROR — users see one clean + diagnostic instead of an opaque traceback from deep in the + YAML/engine path. + """ + config: dict = {} + config_path = getattr(args, "config", None) + if config_path: + try: + import yaml + with open(config_path, "r", encoding="utf-8") as fh: + file_config = yaml.safe_load(fh) or {} + except FileNotFoundError as exc: + raise ValueError(f"Config file not found: {config_path}") from exc + except yaml.YAMLError as exc: + raise ValueError( + f"Config file YAML parse error in {config_path}: {exc}" + ) from exc + + if not isinstance(file_config, dict): + raise ValueError( + f"{config_path} is not a YAML mapping; expected an object " + "at the top level." + ) + containers_section = file_config.get("containers", {}) + if not isinstance(containers_section, dict): + raise ValueError( + f"{config_path}: 'containers' must be a mapping, got " + f"{type(containers_section).__name__}. Expected: " + "containers:\n images:\n - image: \n discover: true" + ) + config = dict(containers_section) + + # CLI overrides — explicit > implicit. --image and --discover both + # OVERWRITE the corresponding config keys so the user's intent is + # unambiguous (and so we don't accidentally double-scan an image + # the user passed on the CLI to *replace* a stale config entry). + if getattr(args, "images", None): + config["images"] = [ + {"image": img, "name": img.split(":")[0].split("/")[-1]} + for img in args.images + ] + if getattr(args, "discover", None) is not None: + config["discover"] = True + config["search_paths"] = [args.discover] + if getattr(args, "scanners", None): + config["scanners"] = [s.strip() for s in args.scanners.split(",")] + + return config + + +def _container_config_has_targets(config: dict) -> bool: + """Return True if the merged container config has any way to resolve targets. + + Used by the dispatcher to decide whether ``argus scan container`` + can proceed without explicit ``--discover``/``--image`` flags. + Mirrors the semantics of ``parse_container_config``: images list + non-empty, ``discover: true``, or an explicit ``search_paths`` + list — any one is enough. + """ + if not isinstance(config, dict): + return False + images = config.get("images") + if isinstance(images, list) and len(images) > 0: + return True + if config.get("discover"): + return True + search_paths = config.get("search_paths") + if isinstance(search_paths, list) and len(search_paths) > 0: + return True + return False + + def _is_dast_lifecycle(args: argparse.Namespace) -> bool: """Check if DAST lifecycle flags are present.""" return bool( @@ -1578,36 +1690,45 @@ def _print_missing_scanner_nudge(requested: list[str], summary) -> None: ) -def _cmd_container_scan(args: argparse.Namespace) -> int: - """Run container image scanning lifecycle (discover, build, scan, report).""" +def _cmd_container_scan( + args: argparse.Namespace, + container_config: dict | None = None, +) -> int: + """Run container image scanning lifecycle (discover, build, scan, report). + + ``container_config`` is the merged config the dispatcher pre-loaded + via ``_load_container_config``. When ``None`` (e.g. a direct + test-side call), this function falls back to loading it locally — + that path is kept for backward compatibility with any caller that + still bypasses ``cmd_scan``. + """ from argus.container import ContainerEngine from argus.reporters.container_markdown import ContainerMarkdownReporter - # Build container config from args and config file - config = {} - - if args.config: + if container_config is None: try: - import yaml - with open(args.config, "r") as fh: - file_config = yaml.safe_load(fh) or {} - config = file_config.get("containers", {}) - except Exception as exc: - print(f"Error loading config: {exc}", file=sys.stderr) + container_config = _load_container_config(args) + except ValueError as exc: + print(f"Error: {exc}", file=sys.stderr) return EXIT_ERROR - # CLI overrides - if args.images: - config["images"] = [ - {"image": img, "name": img.split(":")[0].split("/")[-1]} - for img in args.images - ] - if args.discover is not None: - config["discover"] = True - config["search_paths"] = [args.discover] - if args.scanners: - config["scanners"] = [s.strip() for s in args.scanners.split(",")] + # Defensive: a config that resolves to zero targets after merging + # CLI overrides should hit a clear error before the engine spins + # up, not deep inside it. The dispatcher gates this in the normal + # flow; this branch covers tests / direct-callers + protects + # against a regression where the gate stops covering a case. + if not _container_config_has_targets(container_config): + print( + "Error: container scan has no targets to run. Provide one of:\n" + " --image REF (CLI)\n" + " --discover PATH (CLI)\n" + " containers.images (in --config FILE)\n" + " containers.discover: true + containers.search_paths (in --config FILE)", + file=sys.stderr, + ) + return EXIT_ERROR + config = container_config base_dir = args.output_dir or config.get("output_dir", "./argus-results") output_dir = _make_run_dir(base_dir) formats = args.formats or ["terminal", "markdown"] diff --git a/argus/tests/test_cli.py b/argus/tests/test_cli.py index fa6f1c25..62b4a8c7 100644 --- a/argus/tests/test_cli.py +++ b/argus/tests/test_cli.py @@ -290,6 +290,114 @@ def test_scan_unknown_scanner_returns_error(self, monkeypatch, capsys): captured = capsys.readouterr() assert "unknown scanner 'nonexistent'" in captured.err + def test_container_lifecycle_activates_from_config_only(self, tmp_path, monkeypatch): + """Regression: ``argus scan container --config argus.yml`` should + run end-to-end when the config has containers.images, with no + --discover/--image required. Previously the CLI gate looked at + flags only and exited with usage error before reading config.""" + from argus.cli import _container_config_has_targets, _load_container_config + + config_file = tmp_path / "argus.yml" + config_file.write_text( + "containers:\n" + " images:\n" + " - image: myapp:latest\n" + " dockerfile: Dockerfile\n" + ) + args = _make_scan_args(scanner="container", config=str(config_file)) + + loaded = _load_container_config(args) + # Config-only invocation now resolves real targets without flags. + assert _container_config_has_targets(loaded) is True + assert loaded["images"] == [{"image": "myapp:latest", "dockerfile": "Dockerfile"}] + + def test_container_lifecycle_cli_image_overrides_config(self, tmp_path): + """Explicit --image on the CLI replaces the config's images list, + so a stale config entry doesn't sneak into a one-off scan.""" + from argus.cli import _load_container_config + + config_file = tmp_path / "argus.yml" + config_file.write_text( + "containers:\n images:\n - image: stale:1.0\n" + ) + args = _make_scan_args( + scanner="container", config=str(config_file), images=["fresh:2.0"], + ) + + loaded = _load_container_config(args) + # CLI --image is the source of truth; stale config entry is gone. + assert loaded["images"] == [{"image": "fresh:2.0", "name": "fresh"}] + + def test_container_lifecycle_cli_discover_overrides_search_paths(self, tmp_path): + """``--discover .`` on the CLI replaces the config's search_paths.""" + from argus.cli import _load_container_config + + config_file = tmp_path / "argus.yml" + config_file.write_text( + "containers:\n" + " discover: true\n" + " search_paths:\n - docker/\n" + ) + args = _make_scan_args( + scanner="container", config=str(config_file), discover=".", + ) + + loaded = _load_container_config(args) + assert loaded["search_paths"] == ["."] + assert loaded["discover"] is True + + def test_container_lifecycle_malformed_config_emits_actionable_error(self, tmp_path): + """``containers:`` set to a string (not a mapping) gets a clear + error, not a deep traceback.""" + from argus.cli import _load_container_config + + config_file = tmp_path / "argus.yml" + config_file.write_text("containers: not-a-mapping\n") + args = _make_scan_args(scanner="container", config=str(config_file)) + + with pytest.raises(ValueError) as excinfo: + _load_container_config(args) + msg = str(excinfo.value) + assert "containers" in msg + assert "must be a mapping" in msg + # Hint includes the expected shape so the user knows how to fix it. + assert "images:" in msg + + def test_container_lifecycle_no_targets_returns_usage_error(self, tmp_path, monkeypatch, capsys): + """A config with an empty ``containers:`` block AND no CLI flags + hits the usage-error gate with a config-aware help message.""" + from argus.cli import cmd_scan + monkeypatch.setattr( + "argus.scanners.SCANNER_REGISTRY", {"container": object}, + ) + + config_file = tmp_path / "argus.yml" + config_file.write_text("containers: {}\n") + args = _make_scan_args(scanner="container", config=str(config_file)) + + rc = cmd_scan(args) + err = capsys.readouterr().err + + assert rc == EXIT_ERROR + # The new message names config as a valid source of targets, + # so users running config-only flows see they need to populate + # the ``containers.images`` block — not just add a CLI flag. + assert "--config FILE" in err + assert "containers.images" in err + + def test_container_lifecycle_yaml_parse_error_is_caught(self, tmp_path): + """Invalid YAML produces a friendly error, not a yaml.YAMLError + traceback bubbling up from deep in the loader.""" + from argus.cli import _load_container_config + + config_file = tmp_path / "argus.yml" + config_file.write_text("containers: [{") # incomplete + args = _make_scan_args(scanner="container", config=str(config_file)) + + with pytest.raises(ValueError) as excinfo: + _load_container_config(args) + assert "YAML parse error" in str(excinfo.value) + def test_scan_source_always_emits_canonical_json(self, monkeypatch, tmp_path): """Regression for Option C: argus-results.json must be written regardless of the user's ``reporting.formats``. Captures the diff --git a/argus/tests/test_cli_container.py b/argus/tests/test_cli_container.py index 017246ce..dccf243e 100644 --- a/argus/tests/test_cli_container.py +++ b/argus/tests/test_cli_container.py @@ -209,7 +209,11 @@ class TestCmdScanRouting: def test_routes_to_container_with_discover(self, monkeypatch): called = {} - def fake_container_scan(args): + def fake_container_scan(args, **_kwargs): + # The dispatcher now passes ``container_config=`` so the + # downstream cmd doesn't have to re-load the YAML. Tolerate + # the kwarg without inspecting it — this test only verifies + # the routing decision, not the config plumbing. called["container"] = True return 0 diff --git a/examples/configs/container-config.example.js b/examples/configs/container-config.example.js index f2bec32a..23306dcb 100644 --- a/examples/configs/container-config.example.js +++ b/examples/configs/container-config.example.js @@ -1,23 +1,39 @@ // Export container registry configuration // This file can be used for dynamic config generation with environment-specific values +// RECOMMENDED: PIN EVERY IMAGE TO AN IMMUTABLE DIGEST. +// Tag-only references like ``alpine:3.23.2`` are mutable — the same +// name can publish different bytes over time, which makes CVE +// attribution drift and scan results unreproducible. +// ``@sha256:...`` references are byte-level immutable: the scanner +// reads exactly what you pinned, every run, forever. +// +// Format (1) below is the most ergonomic — simple-string + digest +// pin in one line, Dependabot-updatable. + // DEPENDABOT MAINTENANCE: // For automated image updates with Dependabot, use simple string format for 'image' field. -// Dependabot can update: image: "alpine:3.23.2@sha256:abc123..." +// Dependabot can update: image: "alpine:3.23.2@sha256:865b..." // Dependabot CANNOT update structured format: image: { name: "alpine", tag: "3.23.2" } // See examples/dependabot.example.yml for configuration. module.exports = { containers: [ + // PREFERRED: simple string with digest pin. Reproducible AND + // Dependabot-updatable in one line. { - name: "busybox-latest", - image: "busybox:latest", + name: "alpine-pinned-string", + image: "alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62", scanners: ["trivy", "grype", "syft"], allow_failure: true, fail_on_severity: "medium", }, + + // ALSO PREFERRED: structured form when you need registry/auth + // separation. Same digest-pinned posture; Dependabot can't + // auto-update — Renovate or manual updates only. { - name: "alpine-pinned", + name: "alpine-pinned-structured", registry: { host: "docker.io", }, @@ -31,6 +47,21 @@ module.exports = { allow_failure: true, fail_on_severity: "high", }, + + // ACCEPTABLE FALLBACK: tag-only string. Easier to read but + // mutable — CVE attribution drifts every time the registry + // republishes the tag. Use only for ad-hoc scans or in + // environments where digest discovery isn't yet wired up. + { + name: "busybox-latest", + image: "busybox:latest", + scanners: ["trivy", "grype", "syft"], + allow_failure: true, + fail_on_severity: "medium", + }, + + // Private registry example — auth secrets resolved by the + // calling workflow, not stored here. { name: "ghcr-runner", registry: { @@ -47,21 +78,5 @@ module.exports = { allow_failure: false, fail_on_severity: "none", }, - // Minimal image with pinned digest example - // { - // name: "alpine-app-pinned", - // registry: { - // host: "docker.io", - // }, - // image: { - // repository: "library", - // name: "alpine", - // tag: "3.18", - // digest: "sha256:ACTUAL_ALPINE_DIGEST_HERE", - // }, - // scanners: ["trivy", "grype"], - // allow_failure: true, - // fail_on_severity: "critical", - // }, ], }; diff --git a/examples/configs/container-config.example.json b/examples/configs/container-config.example.json index df2c0381..676ba784 100644 --- a/examples/configs/container-config.example.json +++ b/examples/configs/container-config.example.json @@ -1,10 +1,12 @@ { "$schema": "https://raw.githubusercontent.com/huntridge-labs/argus/0.7.2/.github/actions/parse-container-config/schemas/container-config.schema.json", + "_recommendation": "Pin every image to an immutable digest (@sha256:...). Tag-only references like 'busybox:latest' or 'alpine:3' are mutable \u2014 same name, different bytes over time, drifting CVE attribution. Pin form (1) below is the most ergonomic: simple-string + digest pin in one line, Dependabot-updatable.", "_comment": "For Dependabot automated updates, use simple string format for 'image' field. Dependabot cannot parse structured object format. See examples/dependabot.example.yml", "containers": [ { - "name": "busybox-latest", - "image": "busybox:latest", + "_note": "PREFERRED: simple string with digest pin. Reproducible AND Dependabot-updatable in one line.", + "name": "alpine-pinned-string", + "image": "alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62", "scanners": [ "trivy", "grype", @@ -14,14 +16,15 @@ "fail_on_severity": "medium" }, { - "name": "alpine-pinned", + "_note": "ALSO PREFERRED: structured form when you need registry/auth separation. Same digest-pinned posture; Dependabot can't auto-update \u2014 Renovate or manual.", + "name": "alpine-pinned-structured", "registry": { "host": "docker.io" }, "image": { "repository": "library", "name": "alpine", - "tag": "3.263.2", + "tag": "3.23.2", "digest": "sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62" }, "scanners": [ @@ -32,6 +35,19 @@ "fail_on_severity": "high" }, { + "_note": "ACCEPTABLE FALLBACK: tag-only. Mutable \u2014 CVE attribution drifts whenever the registry republishes the tag. Use only for ad-hoc scans or until you can wire up digest discovery.", + "name": "busybox-latest", + "image": "busybox:latest", + "scanners": [ + "trivy", + "grype", + "syft" + ], + "allow_failure": true, + "fail_on_severity": "medium" + }, + { + "_note": "Private registry example \u2014 auth secrets resolved by the calling workflow, not stored here.", "name": "ghcr-runner", "registry": { "host": "ghcr.io", diff --git a/examples/configs/container-config.example.yml b/examples/configs/container-config.example.yml index 3d481677..bc3ca13e 100644 --- a/examples/configs/container-config.example.yml +++ b/examples/configs/container-config.example.yml @@ -3,26 +3,42 @@ # This file defines containers to scan across multiple registries # Use real, publicly available images for testing +# RECOMMENDED: PIN EVERY IMAGE TO AN IMMUTABLE DIGEST. +# ``:tag`` references are mutable — the same ``alpine:3.23.2`` can +# publish different bytes over time, which makes CVE attribution drift +# and scan results unreproducible. ``@sha256:...`` references are +# byte-level immutable: the scanner reads exactly what you pinned, +# every run, forever. +# +# All three formats below are digest-pinnable: +# 1. Simple string with embedded digest: alpine:3.23.2@sha256:865b... +# 2. Simple string, digest only: alpine@sha256:865b... +# 3. Structured form with `digest:` key: image: { name: alpine, ... } +# +# Format (1) is the most ergonomic — Dependabot and Renovate can both +# update it automatically (image *and* digest in one line). Use that +# unless you need the structured form for registry/auth separation. + # DEPENDABOT MAINTENANCE: # For automated image updates with Dependabot, use simple string format for 'image' field. -# Dependabot can update: image: "alpine:3.23.2@sha256:abc123..." +# Dependabot can update: image: "alpine:3.23.2@sha256:865b..." # Dependabot CANNOT update structured format: image: { name: alpine, tag: "3.23.2" } # See examples/dependabot.example.yml for configuration. containers: - # Public images - no authentication needed - # Simple string format (backward compatible + Dependabot compatible) - - name: busybox-latest - image: busybox:latest + # PREFERRED: simple string with digest pin. Reproducible AND + # Dependabot-updatable in one line. + - name: alpine-pinned-string + image: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62 scanners: [trivy, grype, syft] allow_failure: true fail_on_severity: critical enable_code_security: false - # Structured format with breakdown fields - # Useful when you need to pin digests or control individual components - # NOTE: Dependabot cannot update this format - use simple string if you need Dependabot - - name: alpine-pinned + # ALSO PREFERRED: structured form when you need registry/auth + # separation. Same security posture (digest-pinned) but Dependabot + # can't auto-update — manual or Renovate-driven updates only. + - name: alpine-pinned-structured registry: host: docker.io image: @@ -35,6 +51,18 @@ containers: fail_on_severity: none enable_code_security: false + # ACCEPTABLE FALLBACK: tag-only string. Easier to read but mutable; + # CVE attribution drifts every time the registry republishes the + # tag. Use only for ad-hoc scans or in environments where digest + # discovery isn't yet wired up. Migrate to a pinned form before + # relying on the scan output for an audit trail. + - name: busybox-latest + image: busybox:latest + scanners: [trivy, grype, syft] + allow_failure: true + fail_on_severity: critical + enable_code_security: false + # Private registry examples (uncomment to use) # Use ${VAR_NAME} syntax to reference environment variables/secrets