From 38bc5814b1c34b8d60bc10c01bb4383f3acb153a Mon Sep 17 00:00:00 2001 From: eFAILution Date: Wed, 6 May 2026 10:57:44 -0400 Subject: [PATCH 1/2] fix(container): surface dockerfile path in artifacts and write argus-audit.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related gaps in container scan artifacts that block security review and audit archiving: 1. ContainerScanResult dropped the originating Dockerfile path. The target carries it; scan_image() and the engine error paths threw it away. So argus-results.json, the per-image markdown, the SARIF, and the raw outputs only ever surfaced the auto-derived tag like ``scanner-bandit:argus-scan`` — useless to a reviewer asking "which Dockerfile produced this finding?". Fix: add ``dockerfile`` and ``context`` fields to ContainerScanResult and populate them from the target in every construction site (scan_image happy path + the three error paths in the engine). Empty strings for remote-pull entries. The canonical ScanResult metadata in _cmd_container_scan now surfaces ``dockerfile_path`` and ``context_path`` for build-mode targets and omits them for remote-pull entries (so downstream readers don't have to special-case empty strings). Extracted the metadata-building logic into a small helper ``_canonical_container_metadata(result)`` so the dict shape is unit-testable without spinning up the full container engine. 2. Container scans skipped the audit manifest entirely. Source scans have always called ``create_manifest()`` at start and ``finalize_manifest()`` before exit, writing argus-audit.json alongside argus-results.json. _cmd_container_scan never did. Fix: add the same create+finalize bookend, with scan_targets listing the Dockerfile path for build-mode entries (or the image ref for remote-pull entries) so the manifest answers "which container produced this run's artifacts?". The unified exit-code computation also moved from inline early-returns to a single finalize-and-return at the bottom of the function so every success path goes through the same finalization. Tests: 1585 passing (was 1580; +5 new — ContainerScanResult dockerfile/context defaults, the canonical-metadata helper for remote-pull, build-mode, and scanner-error cases). Verified end-to-end: argus scan container now writes both argus-results.json AND argus-audit.json under the run dir, plus container-scan.md and the raw/ subdir. --- argus/cli.py | 97 +++++++++++++++++++++------ argus/container/engine.py | 6 ++ argus/container/scanner.py | 16 ++++- argus/tests/test_container_scanner.py | 60 +++++++++++++++++ 4 files changed, 159 insertions(+), 20 deletions(-) diff --git a/argus/cli.py b/argus/cli.py index c94d2685..47c9e096 100644 --- a/argus/cli.py +++ b/argus/cli.py @@ -103,6 +103,35 @@ def update_message(self, message: str) -> None: self._stream.flush() +def _canonical_container_metadata(result) -> dict: + """Build the metadata dict for a container ScanResult row. + + Surfaces the originating Dockerfile + context for build-mode + targets so security reviewers and audit archives can trace any + finding back to its source. Empty dockerfile/context fields + (remote-pull entries) are *omitted* rather than written as empty + strings so downstream readers don't have to special-case them. + + Extracted from the inline list comprehension that builds + ``canonical_results`` in ``_cmd_container_scan`` so the dict + shape is unit-testable without spinning up the full container + engine. + """ + metadata: dict = { + "image_ref": result.image_ref, + "build_success": result.build_success, + } + if getattr(result, "dockerfile", ""): + metadata["dockerfile_path"] = result.dockerfile + if getattr(result, "context", ""): + metadata["context_path"] = result.context + if getattr(result, "scanner_errors", None): + metadata["scanner_errors"] = dict(result.scanner_errors) + if getattr(result, "scan_error", None): + metadata["scan_error"] = result.scan_error + return metadata + + def _configure_logger(args: argparse.Namespace, output_dir: str | None = None): """Set up the ``argus`` logger at the level the user's flags imply. @@ -1909,6 +1938,33 @@ def _cmd_container_scan( output_dir = _make_run_dir(base_dir) formats = args.formats or ["terminal", "markdown"] + # Now that we know the output dir, re-attach the logger's file + # handler so engine logs land in /argus.log alongside + # the rest of the audit trail — same shape as the source-scan + # path. ``_configure_logger`` is idempotent on the stream handler; + # passing output_dir adds the file handler when it's missing. + log = _configure_logger(args, output_dir=output_dir) + + # Audit manifest — captures the exact targets, config path, and + # outcome so a security reviewer (or CI archive) can trace any + # finding back to its inputs without cross-referencing the + # workflow. Source scans have always done this; container scans + # used to skip it entirely. ``scan_targets`` lists the originating + # source per target — Dockerfile path for build-mode entries, + # image ref for remote-pull entries — so the audit manifest + # answers "which container produced this artifact?" by name. + from argus.audit import create_manifest, finalize_manifest + from argus.container.discovery import parse_container_config + _audit_targets = [ + str(t.dockerfile) if t.dockerfile else t.image_ref + for t in parse_container_config(config) + ] + manifest = create_manifest( + config_path=getattr(args, "config", None), + scan_targets=_audit_targets, + ) + manifest.execution_backend = config.get("backend", "auto") + # Decide whether to persist raw per-scanner outputs alongside the # canonical argus-results.json. Default is ON — the user just ran # a scan and would expect those artifacts to be available for @@ -1944,6 +2000,7 @@ def _cmd_container_scan( summary = engine.run() except Exception as exc: print(f"Error: container scan failed: {exc}", file=sys.stderr) + finalize_manifest(manifest, exit_code=EXIT_ERROR, output_dir=output_dir) return EXIT_ERROR # Build a canonical ScanSummary view of the container results so @@ -1958,18 +2015,7 @@ def _cmd_container_scan( ScanResult( scanner=f"container/{r.name}", findings=list(r.combined_findings), - metadata={ - "image_ref": r.image_ref, - "build_success": r.build_success, - **( - {"scanner_errors": dict(r.scanner_errors)} - if r.scanner_errors else {} - ), - **( - {"scan_error": r.scan_error} - if getattr(r, "scan_error", None) else {} - ), - }, + metadata=_canonical_container_metadata(r), ) for r in summary.results ] @@ -2009,16 +2055,29 @@ def _cmd_container_scan( f"\n{scan_failures} scanner failure(s) — results are incomplete", file=sys.stderr, ) - return EXIT_ERROR - - if args.severity_threshold and args.severity_threshold != "none": + exit_code = EXIT_ERROR + elif args.severity_threshold and args.severity_threshold != "none": from argus.core.models import Severity threshold = Severity.from_string(args.severity_threshold) + exit_code = EXIT_SUCCESS for r in summary.results: - for f in r.combined_findings: - if f.severity >= threshold: - return EXIT_FINDINGS - return EXIT_SUCCESS + if any(f.severity >= threshold for f in r.combined_findings): + exit_code = EXIT_FINDINGS + break + else: + exit_code = EXIT_SUCCESS + + # Finalize the audit manifest with the canonical summary so the + # archived ``argus-audit.json`` reflects what actually ran. Same + # shape as the source-scan flow. + finalize_manifest( + manifest, + summary=canonical_summary, + exit_code=exit_code, + output_dir=output_dir, + ) + log.info("Audit manifest written to %s/argus-audit.json", output_dir) + return exit_code def _cmd_dast_scan(args: argparse.Namespace) -> int: diff --git a/argus/container/engine.py b/argus/container/engine.py index eb3a5ab4..12a459bc 100644 --- a/argus/container/engine.py +++ b/argus/container/engine.py @@ -173,6 +173,8 @@ def _elapsed() -> int: return ContainerScanResult( name=target.name, image_ref=target.image_ref, + dockerfile=str(target.dockerfile) if target.dockerfile else "", + context=str(target.context) if target.context else "", build_success=False, scan_error=error_msg, ) @@ -206,6 +208,8 @@ def _elapsed() -> int: return ContainerScanResult( name=target.name, image_ref=target.image_ref, + dockerfile=str(target.dockerfile) if target.dockerfile else "", + context=str(target.context) if target.context else "", scan_error=f"OS error: {exc}", ) except Exception: @@ -213,6 +217,8 @@ def _elapsed() -> int: return ContainerScanResult( name=target.name, image_ref=target.image_ref, + dockerfile=str(target.dockerfile) if target.dockerfile else "", + context=str(target.context) if target.context else "", scan_error=f"Scan failed for {target.image_ref}", ) diff --git a/argus/container/scanner.py b/argus/container/scanner.py index 342e5035..cad55381 100644 --- a/argus/container/scanner.py +++ b/argus/container/scanner.py @@ -20,11 +20,23 @@ @dataclass class ContainerScanResult: - """Results for a single container image scan.""" + """Results for a single container image scan. + + ``dockerfile`` and ``context`` capture the source the image was + built from — empty strings for remote-pull entries, populated for + local builds. Without these, downstream artifacts (argus-results. + json, per-image markdown, SARIF, audit manifest) only carry the + auto-derived tag like ``scanner-bandit:argus-scan``, which is + meaningless to a security reviewer asking "which Dockerfile + produced this finding?". Plumbing them through here lets every + consumer surface a real source path alongside the image. + """ name: str image_ref: str digest: str = "" + dockerfile: str = "" + context: str = "" trivy_findings: list[Finding] = field(default_factory=list) grype_findings: list[Finding] = field(default_factory=list) combined_findings: list[Finding] = field(default_factory=list) @@ -205,6 +217,8 @@ def scan_image( return ContainerScanResult( name=target.name, image_ref=target.image_ref, + dockerfile=str(target.dockerfile) if target.dockerfile else "", + context=str(target.context) if target.context else "", trivy_findings=trivy_findings, grype_findings=grype_findings, combined_findings=combined, diff --git a/argus/tests/test_container_scanner.py b/argus/tests/test_container_scanner.py index 51b08912..72028012 100644 --- a/argus/tests/test_container_scanner.py +++ b/argus/tests/test_container_scanner.py @@ -87,6 +87,66 @@ def test_build_failure_defaults(self): assert result.total_count == 0 +class TestContainerScanResultDockerfileFields: + """``dockerfile`` / ``context`` should flow with the result so a security + reviewer can trace any artifact back to its source without cross- + referencing the workflow.""" + + def test_remote_pull_entry_leaves_dockerfile_empty(self): + # Default (remote pull) — both empty strings. + result = ContainerScanResult(name="x", image_ref="x:1") + assert result.dockerfile == "" + assert result.context == "" + + def test_build_entry_carries_dockerfile_and_context(self): + result = ContainerScanResult( + name="myapp", + image_ref="myapp:argus-scan", + dockerfile="docker/Dockerfile.app", + context=".", + ) + assert result.dockerfile == "docker/Dockerfile.app" + assert result.context == "." + + +class TestCanonicalContainerMetadata: + """The cli helper that maps ContainerScanResult → ScanResult metadata. + + Locks in the dict shape so security reviewers and the audit-archive + layer always see ``dockerfile_path`` for build-mode targets. + """ + + def test_remote_pull_omits_dockerfile_keys(self): + from argus.cli import _canonical_container_metadata + result = ContainerScanResult(name="x", image_ref="x:1") + meta = _canonical_container_metadata(result) + assert meta["image_ref"] == "x:1" + assert meta["build_success"] is True + assert "dockerfile_path" not in meta + assert "context_path" not in meta + + def test_build_entry_includes_dockerfile_path(self): + from argus.cli import _canonical_container_metadata + result = ContainerScanResult( + name="myapp", + image_ref="myapp:argus-scan", + dockerfile="docker/Dockerfile.app", + context=".", + ) + meta = _canonical_container_metadata(result) + assert meta["dockerfile_path"] == "docker/Dockerfile.app" + assert meta["context_path"] == "." + + def test_scanner_errors_surfaced(self): + from argus.cli import _canonical_container_metadata + result = ContainerScanResult( + name="x", image_ref="x:1", + scanner_errors={"trivy": "DB pull failed"}, + ) + meta = _canonical_container_metadata(result) + assert meta["scanner_errors"] == {"trivy": "DB pull failed"} + + class TestDeduplicateFindings: """Test deduplicate_findings merging logic.""" From 6f3452248171a06b66d58f646911c1aa2a8590b3 Mon Sep 17 00:00:00 2001 From: eFAILution Date: Wed, 6 May 2026 11:13:02 -0400 Subject: [PATCH 2/2] test(container): cover engine error paths and scan_image dockerfile threading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings codecov/patch above the 80% threshold by exercising the new dockerfile/context plumbing through every engine path that builds a ContainerScanResult — build failure, OSError during scan, generic Exception during scan — plus the happy-path threading from scan_image. Each test asserts dockerfile and context are populated from the target (build entries) or empty (remote-pull entries). Five new tests in TestEngineErrorPathsCarryDockerfile and TestScanImageThreadsDockerfile. --- argus/tests/test_container_scanner_runners.py | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/argus/tests/test_container_scanner_runners.py b/argus/tests/test_container_scanner_runners.py index 93fcf3b3..efafb97e 100644 --- a/argus/tests/test_container_scanner_runners.py +++ b/argus/tests/test_container_scanner_runners.py @@ -549,3 +549,135 @@ def fake_grype(image_ref, tmp_path, local=False): # Combined view doesn't claim grype's missing data is "no # vulnerabilities" — it's simply trivy's findings. assert len(result.combined_findings) == 1 + + +# ─────────────────────────────────────────────── +# Engine error-path dockerfile propagation +# ─────────────────────────────────────────────── + + +class TestEngineErrorPathsCarryDockerfile: + """Every engine path that builds a ContainerScanResult must + carry the originating Dockerfile + context, including the failure + paths. Otherwise a build error or an OS error would lose the + dockerfile reference and a security reviewer couldn't trace which + container the error belongs to. + """ + + def _engine(self): + from argus.container.engine import ContainerEngine + return ContainerEngine({}) + + def _build_target(self, tmp_path): + from argus.container.discovery import ContainerTarget + return ContainerTarget( + name="myapp", + image_ref="myapp:argus-scan", + dockerfile=tmp_path / "Dockerfile", + context=tmp_path, + ) + + def test_build_failure_preserves_dockerfile(self, tmp_path, monkeypatch): + target = self._build_target(tmp_path) + # Pretend build failed. + monkeypatch.setattr( + "argus.container.engine.build_image", lambda t: False, + ) + # Disk-space probe — return enough to avoid the OOD message branch. + monkeypatch.setattr( + "argus.container.engine.check_disk_space", lambda: 10 * 1024**3, + ) + result = self._engine()._process_target(target) + assert result.build_success is False + assert result.dockerfile == str(tmp_path / "Dockerfile") + assert result.context == str(tmp_path) + + def test_oserror_during_scan_preserves_dockerfile(self, tmp_path, monkeypatch): + target = self._build_target(tmp_path) + monkeypatch.setattr( + "argus.container.engine.build_image", lambda t: True, + ) + monkeypatch.setattr( + "argus.container.engine.scan_image", + lambda *a, **kw: (_ for _ in ()).throw(OSError("disk full")), + ) + result = self._engine()._process_target(target) + assert "OS error" in result.scan_error + assert result.dockerfile == str(tmp_path / "Dockerfile") + assert result.context == str(tmp_path) + + def test_generic_exception_during_scan_preserves_dockerfile( + self, tmp_path, monkeypatch, + ): + target = self._build_target(tmp_path) + monkeypatch.setattr( + "argus.container.engine.build_image", lambda t: True, + ) + monkeypatch.setattr( + "argus.container.engine.scan_image", + lambda *a, **kw: (_ for _ in ()).throw(RuntimeError("oops")), + ) + result = self._engine()._process_target(target) + assert "Scan failed" in result.scan_error + assert result.dockerfile == str(tmp_path / "Dockerfile") + assert result.context == str(tmp_path) + + +# ─────────────────────────────────────────────── +# scan_image happy-path threading +# ─────────────────────────────────────────────── + + +class TestScanImageThreadsDockerfile: + """The happy-path ContainerScanResult from ``scan_image`` must + also carry dockerfile/context from the target.""" + + def test_scan_image_populates_dockerfile_from_target( + self, tmp_path, monkeypatch, + ): + from argus.container.discovery import ContainerTarget + from argus.container.scanner import scan_image + + target = ContainerTarget( + name="myapp", + image_ref="myapp:argus-scan", + dockerfile=tmp_path / "Dockerfile.x", + context=tmp_path, + ) + + # Stub out the actual scanners — we only need scan_image to + # construct the result and return. + monkeypatch.setattr( + "argus.container.scanner._run_trivy", + lambda *a, **kw: [], + ) + monkeypatch.setattr( + "argus.container.scanner._run_grype", + lambda *a, **kw: [], + ) + + result = scan_image(target, scanners=("trivy", "grype")) + assert result.dockerfile == str(tmp_path / "Dockerfile.x") + assert result.context == str(tmp_path) + + def test_scan_image_remote_pull_leaves_dockerfile_empty( + self, tmp_path, monkeypatch, + ): + from argus.container.discovery import ContainerTarget + from argus.container.scanner import scan_image + + # Remote-pull entry — no dockerfile, no context. + target = ContainerTarget(name="webapp", image_ref="myorg/webapp:1.0") + + monkeypatch.setattr( + "argus.container.scanner._run_trivy", + lambda *a, **kw: [], + ) + monkeypatch.setattr( + "argus.container.scanner._run_grype", + lambda *a, **kw: [], + ) + + result = scan_image(target, scanners=("trivy", "grype")) + assert result.dockerfile == "" + assert result.context == ""