huntridge-labs · eFAILution · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/argus.example.yml b/argus.example.yml
@@ -55,6 +55,15 @@ reporting:
   severity_threshold: high
   output_dir: "./argus-results"
 
+  # Persist each scanner's raw output (results.json, *.sarif,
+  # stdout.txt) under ``<output_dir>/raw/<scanner>/`` alongside the
+  # canonical argus-results.json. Default true — useful for
+  # forensics, audit trails, and manual triage. Set false (or pass
+  # --no-keep-raw on the CLI) to skip; saves a few MB per scan in
+  # tight CI environments. Applies to both source scans (``argus
+  # scan``) and container scans (``argus scan container``).
+  keep_raw: true
+
 # Container lifecycle targets (consumed by ``argus scan container``).
 # Defining anything under this top-level ``containers:`` key activates
 # config-driven container scans — no need to pass --image / --discover
@@ -101,6 +110,11 @@ reporting:
 #
 #   # Override which sub-scanners run; default is trivy + grype.
 #   scanners: [trivy, grype, syft]
+#
+# Note: raw scanner-output preservation is configured via the
+# unified ``reporting.keep_raw`` knob above — the same flag covers
+# trivy/grype/syft container outputs and source-scan scanner
+# outputs. No separate container-only setting is needed.
 
 # Execution backend configuration
 execution:

diff --git a/argus/cli.py b/argus/cli.py
@@ -590,6 +590,19 @@ def _build_scan_parser(subparsers: argparse._SubParsersAction) -> None:
         help="Disable DB cache volume mounts. Forces scanners to re-download "
              "vulnerability databases on every container run.",
     )
+    scan_parser.add_argument(
+        "--no-keep-raw",
+        action="store_true",
+        dest="no_keep_raw",
+        help="Do not persist raw per-scanner output files alongside the "
+             "canonical argus-results.json. Source scans normally drop "
+             "each scanner's results.json / *.sarif / stdout.txt under "
+             "<output_dir>/raw/<scanner>/; container scans drop "
+             "trivy-results.json / grype-results.json / syft-sbom.json "
+             "under <output_dir>/raw/<image>/. Pass --no-keep-raw to "
+             "skip that step in tight CI environments. The same effect "
+             "is available via 'reporting.keep_raw: false' in argus.yml.",
+    )
 
     # Container-specific flags (used with: argus scan container)
     container_group = scan_parser.add_argument_group(
@@ -1135,6 +1148,15 @@ def _load_container_config(args: argparse.Namespace) -> dict:
             )
         config = dict(containers_section)
 
+        # Pull ``reporting.keep_raw`` from the same file so the
+        # container handler honors the unified config knob — same
+        # default-True semantics as ``_cmd_source_scan``. Stashed
+        # under a synthetic underscore key so it doesn't collide
+        # with any future ``containers:`` field a user might add.
+        reporting_section = file_config.get("reporting", {})
+        if isinstance(reporting_section, dict) and "keep_raw" in reporting_section:
+            config["_reporting_keep_raw"] = bool(reporting_section["keep_raw"])
+
     # CLI overrides — explicit > implicit. --image and --discover both
     # OVERWRITE the corresponding config keys so the user's intent is
     # unambiguous (and so we don't accidentally double-scan an image
@@ -1248,6 +1270,18 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
 
     log.info("Argus scan starting")
 
+    # Decide whether to persist raw per-scanner outputs alongside the
+    # canonical argus-results.json. Default ON — users running
+    # ``argus scan`` reasonably expect each scanner's raw results
+    # (results.json / *.sarif / stdout.txt) to be available for
+    # forensics or manual triage. Opt out via ``--no-keep-raw``
+    # (CLI) or ``reporting.keep_raw: false`` (argus.yml). CLI flag
+    # wins on conflict, matching the dispatcher's
+    # explicit-over-implicit posture used throughout.
+    keep_raw_config = getattr(config.reporting, "keep_raw", True)
+    keep_raw = bool(keep_raw_config) and not getattr(args, "no_keep_raw", False)
+    raw_output_root = str(Path(output_dir) / "raw") if keep_raw else None
+
     # Build engine and register scanners
     engine = ArgusEngine(config)
 
@@ -1352,6 +1386,7 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
                             use_default_excludes=not getattr(args, "no_default_excludes", False),
                             sbom_path=str(info.path),
                             sbom_format=info.format,
+                            raw_output_dir=raw_output_root,
                         )
                     except Exception as exc:
                         log.error(
@@ -1392,6 +1427,7 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
                     allow_local_versions=getattr(args, "allow_local_versions", False),
                     no_cache=getattr(args, "no_cache", False),
                     use_default_excludes=not getattr(args, "no_default_excludes", False),
+                    raw_output_dir=raw_output_root,
                 )
         if args.verbose and getattr(engine, "_last_resolutions", None):
             from argus.core.tool_config import format_resolutions_for_display
@@ -1733,6 +1769,24 @@ def _cmd_container_scan(
     output_dir = _make_run_dir(base_dir)
     formats = args.formats or ["terminal", "markdown"]
 
+    # Decide whether to persist raw per-scanner outputs alongside the
+    # canonical argus-results.json. Default is ON — the user just ran
+    # a scan and would expect those artifacts to be available for
+    # manual triage. Opt out via ``--no-keep-raw`` (CLI) or
+    # ``containers.keep_raw: false`` (argus.yml). CLI flag wins on
+    # conflict, matching the rest of the dispatcher's
+    # explicit-over-implicit posture.
+    # ``reporting.keep_raw`` is the unified config home for raw-output
+    # preservation; the legacy ``containers.keep_raw`` is still read
+    # as a fallback so configs from earlier in this PR's lifecycle
+    # don't break. CLI ``--no-keep-raw`` wins over both.
+    keep_raw_config = config.get(
+        "_reporting_keep_raw", config.get("keep_raw", True),
+    )
+    keep_raw = bool(keep_raw_config) and not getattr(args, "no_keep_raw", False)
+    if keep_raw:
+        config["_raw_output_root"] = str(Path(output_dir) / "raw")
+
     # Run
     try:
         engine = ContainerEngine(config)
@@ -1745,6 +1799,42 @@ def _cmd_container_scan(
         print(f"Error: container scan failed: {exc}", file=sys.stderr)
         return EXIT_ERROR
 
+    # Build a canonical ScanSummary view of the container results so
+    # the standard reporters (json → argus-results.json, sarif) and
+    # ``argus view`` can consume container scans the same way they
+    # consume source scans. Each container target becomes a
+    # ScanResult; the per-image domain metadata (image_ref, build
+    # status, scanner_errors) lifts onto ScanResult.metadata so the
+    # browser dashboard and exporters surface it.
+    from argus.core.models import ScanResult, ScanSummary
+    canonical_results = [
+        ScanResult(
+            scanner=f"container/{r.name}",
+            findings=list(r.combined_findings),
+            metadata={
+                "image_ref": r.image_ref,
+                "build_success": r.build_success,
+                **(
+                    {"scanner_errors": dict(r.scanner_errors)}
+                    if r.scanner_errors else {}
+                ),
+                **(
+                    {"scan_error": r.scan_error}
+                    if getattr(r, "scan_error", None) else {}
+                ),
+            },
+        )
+        for r in summary.results
+    ]
+    canonical_summary = ScanSummary(results=canonical_results)
+
+    # Always emit argus-results.json — same canonical-artifact
+    # contract the source-scan flow established. ``argus view`` and
+    # the audit manifest both consume this regardless of what the
+    # user listed in ``formats``.
+    from argus.reporters import get_reporter
+    get_reporter("json").report(canonical_summary, output_dir)
+
     # Reports
     for fmt in formats:
         if fmt == "markdown":
@@ -1755,16 +1845,15 @@ def _cmd_container_scan(
         elif fmt == "terminal":
             _print_container_terminal(summary)
         elif fmt == "json":
+            # Domain-shaped per-image summary (container_count etc.)
+            # lives at container-scan.json. The canonical
+            # argus-results.json was already written above; this
+            # is the supplementary domain artifact for tooling that
+            # wants per-image stats without parsing findings.
             _write_container_json(summary, output_dir)
         elif fmt == "sarif":
-            from argus.core.models import ScanResult, ScanSummary
-            from argus.reporters import get_reporter
-            results = [
-                ScanResult(scanner=f"container/{r.name}", findings=r.combined_findings)
-                for r in summary.results
-            ]
             sarif_reporter = get_reporter("sarif")
-            sarif_reporter.report(ScanSummary(results=results), output_dir)
+            sarif_reporter.report(canonical_summary, output_dir)
 
     # Exit code — scanner failures are always non-zero
     scan_failures = getattr(summary, "scan_failures", 0)

diff --git a/argus/container/engine.py b/argus/container/engine.py
@@ -6,6 +6,7 @@
 """
 
 import logging
+from pathlib import Path
 
 from .builder import build_image
 from .discovery import (
@@ -130,10 +131,20 @@ def _process_target(self, target: ContainerTarget) -> ContainerScanResult:
             self._built_images.append(target.image_ref)
 
         try:
+            # If the dispatcher set ``_raw_output_root`` in the
+            # config dict, persist this target's raw scanner outputs
+            # under ``<root>/<target.name>/``. Caller controls
+            # whether this is set (CLI flag + config opt-out); the
+            # engine just threads it through.
+            raw_root = self.config.get("_raw_output_root")
+            target_raw_dir = (
+                Path(raw_root) / target.name if raw_root else None
+            )
             return scan_image(
                 target,
                 scanners=self._scanners(),
                 sbom=self._sbom_enabled(),
+                raw_output_dir=target_raw_dir,
             )
         except OSError as exc:
             # Disk full, permission denied, etc.

diff --git a/argus/container/scanner.py b/argus/container/scanner.py
@@ -123,6 +123,7 @@ def scan_image(
     target: ContainerTarget,
     scanners: tuple[str, ...] = ("trivy", "grype"),
     sbom: bool = True,
+    raw_output_dir: Path | None = None,
 ) -> ContainerScanResult:
     """Scan a single container image with trivy and/or grype.
 
@@ -132,7 +133,19 @@ def scan_image(
 
     For locally-built images, scanners reference the local Docker daemon.
     Per-scanner errors are caught and recorded, not swallowed.
+
+    ``raw_output_dir``: when supplied, the raw scanner output files
+    (``trivy-results.json``, ``grype-results.json``, ``syft-sbom.json``)
+    are copied into this directory before the temp dir is cleaned up.
+    Lets users preserve full per-scanner artifacts for forensics,
+    audit, or manual triage workflows alongside the canonical
+    ``argus-results.json``. ``None`` (the default) means transient
+    output — historic behavior.
     """
+    import shutil as _shutil  # local import to avoid shadowing the
+                              # module-level ``shutil`` reference used
+                              # by ``shutil.which`` checks below.
+
     trivy_findings: list[Finding] = []
     grype_findings: list[Finding] = []
     scanner_errors: dict[str, str] = {}
@@ -164,6 +177,29 @@ def scan_image(
         if sbom and "syft" not in scanners:
             _run_syft(target.image_ref, tmp_path)
 
+        # Persist raw scanner artifacts (best-effort) before the
+        # tempdir is wiped. We copy whatever files exist; missing
+        # files (e.g. grype failed before writing) just don't get
+        # copied — the structured ``scanner_errors`` already records
+        # why. Errors during copy are non-fatal: the scan succeeded,
+        # the canonical JSON is still emitted upstream.
+        if raw_output_dir is not None:
+            try:
+                raw_output_dir.mkdir(parents=True, exist_ok=True)
+                for fname in (
+                    "trivy-results.json",
+                    "grype-results.json",
+                    "syft-sbom.json",
+                ):
+                    src = tmp_path / fname
+                    if src.exists() and src.stat().st_size > 0:
+                        _shutil.copy2(src, raw_output_dir / fname)
+            except OSError as exc:
+                logger.warning(
+                    "Failed to persist raw scanner outputs to %s: %s",
+                    raw_output_dir, exc,
+                )
+
     combined = deduplicate_findings(trivy_findings, grype_findings)
 
     return ContainerScanResult(

diff --git a/argus/core/config.py b/argus/core/config.py
@@ -44,6 +44,14 @@ class ReportingConfig:
     formats: list[str] = field(default_factory=lambda: ["terminal"])
     severity_threshold: Optional[Severity] = None
     output_dir: str = "./argus-results"
+    # When True, the engine persists each scanner's raw output files
+    # (results.json / *.sarif / stdout.txt) under
+    # ``<output_dir>/raw/<scanner>/`` alongside the canonical
+    # ``argus-results.json``. Default ON since most users running
+    # ``argus scan`` would expect the artifacts to be available for
+    # forensics or manual triage; opt out via ``--no-keep-raw`` (CLI)
+    # or ``reporting.keep_raw: false`` for tight CI environments.
+    keep_raw: bool = True
 
 
 @dataclass
@@ -208,6 +216,7 @@ def _parse_reporting_config(raw: dict | None) -> ReportingConfig:
         formats=raw.get("formats", ["terminal"]),
         severity_threshold=_parse_severity(raw.get("severity_threshold")),
         output_dir=raw.get("output_dir", "./argus-results"),
+        keep_raw=bool(raw.get("keep_raw", True)),
     )
 
 

diff --git a/argus/core/engine.py b/argus/core/engine.py
@@ -35,6 +35,7 @@ def __init__(self, config: ArgusConfig):
         self._no_cache: bool = False
         self._sbom_path: str | None = None
         self._sbom_format: str | None = None
+        self._raw_output_root: str | None = None
 
     def register_scanner(self, scanner: Scanner) -> None:
         """Register a scanner instance for use by the engine."""
@@ -59,6 +60,7 @@ def run(
         use_default_excludes: bool = True,
         sbom_path: str | None = None,
         sbom_format: str | None = None,
+        raw_output_dir: str | None = None,
     ) -> ScanSummary:
         """Run scanners and return an aggregated ScanSummary.
 
@@ -79,6 +81,14 @@ def run(
                 attribute is True, auto-enables them regardless of
                 argus.yml, and threads the SBOM path through
                 ``config_dict['sbom_path']``.
+            raw_output_dir: when set, ``_run_in_container`` copies each
+                scanner's raw output files (``results.json``,
+                ``stdout.txt``, ``*.sarif``) into
+                ``<raw_output_dir>/<scanner_name>/`` before the
+                per-scanner tempdir is cleaned up. Mirrors the
+                container-scan flow's ``raw/`` artifact preservation
+                so users can drill into individual scanner output
+                regardless of which scan flow produced it.
         """
         from .exclusions import build_exclusion_set, log_exclusion_set
 
@@ -87,6 +97,7 @@ def run(
         self._use_default_excludes = use_default_excludes
         self._sbom_path = sbom_path
         self._sbom_format = sbom_format
+        self._raw_output_root = raw_output_dir
 
         # Validate sbom_format if provided
         if sbom_format is not None and sbom_format not in SBOM_FORMAT_EXTENSIONS:
@@ -710,6 +721,28 @@ def _run_in_container(
                 result_files = [stdout_file]
                 logger.debug("No output files — captured stdout (%d bytes)", len(proc.stdout))
 
+            # Persist raw scanner output (best-effort) before the
+            # tempdir is wiped. Mirrors the container-scan flow's
+            # ``raw/`` artifact preservation: every scanner gets its
+            # own subdir under ``<raw_output_root>/<scanner.name>/``
+            # so ``argus-results.json`` (the canonical artifact)
+            # lives next to the per-scanner files (results.json,
+            # *.sarif, stdout.txt) for forensics or manual triage.
+            # Errors during copy are non-fatal — the scan succeeded,
+            # the canonical JSON is still emitted upstream.
+            if self._raw_output_root and result_files:
+                try:
+                    target_dir = Path(self._raw_output_root) / scanner.name
+                    target_dir.mkdir(parents=True, exist_ok=True)
+                    for src in result_files:
+                        if src.exists() and src.stat().st_size > 0:
+                            shutil.copy2(src, target_dir / src.name)
+                except OSError as exc:
+                    logger.warning(
+                        "Failed to persist raw output for '%s' under %s: %s",
+                        scanner.name, self._raw_output_root, exc,
+                    )
+
             if result_files:
                 logger.debug(
                     "Output files: %s",