Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions argus.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ reporting:
severity_threshold: high
output_dir: "./argus-results"

# Persist each scanner's raw output (results.json, *.sarif,
# stdout.txt) under ``<output_dir>/raw/<scanner>/`` alongside the
# canonical argus-results.json. Default true — useful for
# forensics, audit trails, and manual triage. Set false (or pass
# --no-keep-raw on the CLI) to skip; saves a few MB per scan in
# tight CI environments. Applies to both source scans (``argus
# scan``) and container scans (``argus scan container``).
keep_raw: true

# Container lifecycle targets (consumed by ``argus scan container``).
# Defining anything under this top-level ``containers:`` key activates
# config-driven container scans — no need to pass --image / --discover
Expand Down Expand Up @@ -101,6 +110,11 @@ reporting:
#
# # Override which sub-scanners run; default is trivy + grype.
# scanners: [trivy, grype, syft]
#
# Note: raw scanner-output preservation is configured via the
# unified ``reporting.keep_raw`` knob above — the same flag covers
# trivy/grype/syft container outputs and source-scan scanner
# outputs. No separate container-only setting is needed.

# Execution backend configuration
execution:
Expand Down
103 changes: 96 additions & 7 deletions argus/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,19 @@ def _build_scan_parser(subparsers: argparse._SubParsersAction) -> None:
help="Disable DB cache volume mounts. Forces scanners to re-download "
"vulnerability databases on every container run.",
)
scan_parser.add_argument(
"--no-keep-raw",
action="store_true",
dest="no_keep_raw",
help="Do not persist raw per-scanner output files alongside the "
"canonical argus-results.json. Source scans normally drop "
"each scanner's results.json / *.sarif / stdout.txt under "
"<output_dir>/raw/<scanner>/; container scans drop "
"trivy-results.json / grype-results.json / syft-sbom.json "
"under <output_dir>/raw/<image>/. Pass --no-keep-raw to "
"skip that step in tight CI environments. The same effect "
"is available via 'reporting.keep_raw: false' in argus.yml.",
)

# Container-specific flags (used with: argus scan container)
container_group = scan_parser.add_argument_group(
Expand Down Expand Up @@ -1135,6 +1148,15 @@ def _load_container_config(args: argparse.Namespace) -> dict:
)
config = dict(containers_section)

# Pull ``reporting.keep_raw`` from the same file so the
# container handler honors the unified config knob — same
# default-True semantics as ``_cmd_source_scan``. Stashed
# under a synthetic underscore key so it doesn't collide
# with any future ``containers:`` field a user might add.
reporting_section = file_config.get("reporting", {})
if isinstance(reporting_section, dict) and "keep_raw" in reporting_section:
config["_reporting_keep_raw"] = bool(reporting_section["keep_raw"])

# CLI overrides — explicit > implicit. --image and --discover both
# OVERWRITE the corresponding config keys so the user's intent is
# unambiguous (and so we don't accidentally double-scan an image
Expand Down Expand Up @@ -1248,6 +1270,18 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:

log.info("Argus scan starting")

# Decide whether to persist raw per-scanner outputs alongside the
# canonical argus-results.json. Default ON — users running
# ``argus scan`` reasonably expect each scanner's raw results
# (results.json / *.sarif / stdout.txt) to be available for
# forensics or manual triage. Opt out via ``--no-keep-raw``
# (CLI) or ``reporting.keep_raw: false`` (argus.yml). CLI flag
# wins on conflict, matching the dispatcher's
# explicit-over-implicit posture used throughout.
keep_raw_config = getattr(config.reporting, "keep_raw", True)
keep_raw = bool(keep_raw_config) and not getattr(args, "no_keep_raw", False)
raw_output_root = str(Path(output_dir) / "raw") if keep_raw else None

# Build engine and register scanners
engine = ArgusEngine(config)

Expand Down Expand Up @@ -1352,6 +1386,7 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
use_default_excludes=not getattr(args, "no_default_excludes", False),
sbom_path=str(info.path),
sbom_format=info.format,
raw_output_dir=raw_output_root,
)
except Exception as exc:
log.error(
Expand Down Expand Up @@ -1392,6 +1427,7 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
allow_local_versions=getattr(args, "allow_local_versions", False),
no_cache=getattr(args, "no_cache", False),
use_default_excludes=not getattr(args, "no_default_excludes", False),
raw_output_dir=raw_output_root,
)
if args.verbose and getattr(engine, "_last_resolutions", None):
from argus.core.tool_config import format_resolutions_for_display
Expand Down Expand Up @@ -1733,6 +1769,24 @@ def _cmd_container_scan(
output_dir = _make_run_dir(base_dir)
formats = args.formats or ["terminal", "markdown"]

# Decide whether to persist raw per-scanner outputs alongside the
# canonical argus-results.json. Default is ON — the user just ran
# a scan and would expect those artifacts to be available for
# manual triage. Opt out via ``--no-keep-raw`` (CLI) or
# ``containers.keep_raw: false`` (argus.yml). CLI flag wins on
# conflict, matching the rest of the dispatcher's
# explicit-over-implicit posture.
# ``reporting.keep_raw`` is the unified config home for raw-output
# preservation; the legacy ``containers.keep_raw`` is still read
# as a fallback so configs from earlier in this PR's lifecycle
# don't break. CLI ``--no-keep-raw`` wins over both.
keep_raw_config = config.get(
"_reporting_keep_raw", config.get("keep_raw", True),
)
keep_raw = bool(keep_raw_config) and not getattr(args, "no_keep_raw", False)
if keep_raw:
config["_raw_output_root"] = str(Path(output_dir) / "raw")

# Run
try:
engine = ContainerEngine(config)
Expand All @@ -1745,6 +1799,42 @@ def _cmd_container_scan(
print(f"Error: container scan failed: {exc}", file=sys.stderr)
return EXIT_ERROR

# Build a canonical ScanSummary view of the container results so
# the standard reporters (json → argus-results.json, sarif) and
# ``argus view`` can consume container scans the same way they
# consume source scans. Each container target becomes a
# ScanResult; the per-image domain metadata (image_ref, build
# status, scanner_errors) lifts onto ScanResult.metadata so the
# browser dashboard and exporters surface it.
from argus.core.models import ScanResult, ScanSummary
canonical_results = [
ScanResult(
scanner=f"container/{r.name}",
findings=list(r.combined_findings),
metadata={
"image_ref": r.image_ref,
"build_success": r.build_success,
**(
{"scanner_errors": dict(r.scanner_errors)}
if r.scanner_errors else {}
),
**(
{"scan_error": r.scan_error}
if getattr(r, "scan_error", None) else {}
),
},
)
for r in summary.results
]
canonical_summary = ScanSummary(results=canonical_results)

# Always emit argus-results.json — same canonical-artifact
# contract the source-scan flow established. ``argus view`` and
# the audit manifest both consume this regardless of what the
# user listed in ``formats``.
from argus.reporters import get_reporter
get_reporter("json").report(canonical_summary, output_dir)

# Reports
for fmt in formats:
if fmt == "markdown":
Expand All @@ -1755,16 +1845,15 @@ def _cmd_container_scan(
elif fmt == "terminal":
_print_container_terminal(summary)
elif fmt == "json":
# Domain-shaped per-image summary (container_count etc.)
# lives at container-scan.json. The canonical
# argus-results.json was already written above; this
# is the supplementary domain artifact for tooling that
# wants per-image stats without parsing findings.
_write_container_json(summary, output_dir)
elif fmt == "sarif":
from argus.core.models import ScanResult, ScanSummary
from argus.reporters import get_reporter
results = [
ScanResult(scanner=f"container/{r.name}", findings=r.combined_findings)
for r in summary.results
]
sarif_reporter = get_reporter("sarif")
sarif_reporter.report(ScanSummary(results=results), output_dir)
sarif_reporter.report(canonical_summary, output_dir)

# Exit code — scanner failures are always non-zero
scan_failures = getattr(summary, "scan_failures", 0)
Expand Down
11 changes: 11 additions & 0 deletions argus/container/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import logging
from pathlib import Path

from .builder import build_image
from .discovery import (
Expand Down Expand Up @@ -130,10 +131,20 @@ def _process_target(self, target: ContainerTarget) -> ContainerScanResult:
self._built_images.append(target.image_ref)

try:
# If the dispatcher set ``_raw_output_root`` in the
# config dict, persist this target's raw scanner outputs
# under ``<root>/<target.name>/``. Caller controls
# whether this is set (CLI flag + config opt-out); the
# engine just threads it through.
raw_root = self.config.get("_raw_output_root")
target_raw_dir = (
Path(raw_root) / target.name if raw_root else None
)
return scan_image(
target,
scanners=self._scanners(),
sbom=self._sbom_enabled(),
raw_output_dir=target_raw_dir,
)
except OSError as exc:
# Disk full, permission denied, etc.
Expand Down
36 changes: 36 additions & 0 deletions argus/container/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def scan_image(
target: ContainerTarget,
scanners: tuple[str, ...] = ("trivy", "grype"),
sbom: bool = True,
raw_output_dir: Path | None = None,
) -> ContainerScanResult:
"""Scan a single container image with trivy and/or grype.

Expand All @@ -132,7 +133,19 @@ def scan_image(

For locally-built images, scanners reference the local Docker daemon.
Per-scanner errors are caught and recorded, not swallowed.

``raw_output_dir``: when supplied, the raw scanner output files
(``trivy-results.json``, ``grype-results.json``, ``syft-sbom.json``)
are copied into this directory before the temp dir is cleaned up.
Lets users preserve full per-scanner artifacts for forensics,
audit, or manual triage workflows alongside the canonical
``argus-results.json``. ``None`` (the default) means transient
output — historic behavior.
"""
import shutil as _shutil # local import to avoid shadowing the
# module-level ``shutil`` reference used
# by ``shutil.which`` checks below.

trivy_findings: list[Finding] = []
grype_findings: list[Finding] = []
scanner_errors: dict[str, str] = {}
Expand Down Expand Up @@ -164,6 +177,29 @@ def scan_image(
if sbom and "syft" not in scanners:
_run_syft(target.image_ref, tmp_path)

# Persist raw scanner artifacts (best-effort) before the
# tempdir is wiped. We copy whatever files exist; missing
# files (e.g. grype failed before writing) just don't get
# copied — the structured ``scanner_errors`` already records
# why. Errors during copy are non-fatal: the scan succeeded,
# the canonical JSON is still emitted upstream.
if raw_output_dir is not None:
try:
raw_output_dir.mkdir(parents=True, exist_ok=True)
for fname in (
"trivy-results.json",
"grype-results.json",
"syft-sbom.json",
):
src = tmp_path / fname
if src.exists() and src.stat().st_size > 0:
_shutil.copy2(src, raw_output_dir / fname)
except OSError as exc:
logger.warning(
"Failed to persist raw scanner outputs to %s: %s",
raw_output_dir, exc,
)

combined = deduplicate_findings(trivy_findings, grype_findings)

return ContainerScanResult(
Expand Down
9 changes: 9 additions & 0 deletions argus/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class ReportingConfig:
formats: list[str] = field(default_factory=lambda: ["terminal"])
severity_threshold: Optional[Severity] = None
output_dir: str = "./argus-results"
# When True, the engine persists each scanner's raw output files
# (results.json / *.sarif / stdout.txt) under
# ``<output_dir>/raw/<scanner>/`` alongside the canonical
# ``argus-results.json``. Default ON since most users running
# ``argus scan`` would expect the artifacts to be available for
# forensics or manual triage; opt out via ``--no-keep-raw`` (CLI)
# or ``reporting.keep_raw: false`` for tight CI environments.
keep_raw: bool = True


@dataclass
Expand Down Expand Up @@ -208,6 +216,7 @@ def _parse_reporting_config(raw: dict | None) -> ReportingConfig:
formats=raw.get("formats", ["terminal"]),
severity_threshold=_parse_severity(raw.get("severity_threshold")),
output_dir=raw.get("output_dir", "./argus-results"),
keep_raw=bool(raw.get("keep_raw", True)),
)


Expand Down
33 changes: 33 additions & 0 deletions argus/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self, config: ArgusConfig):
self._no_cache: bool = False
self._sbom_path: str | None = None
self._sbom_format: str | None = None
self._raw_output_root: str | None = None

def register_scanner(self, scanner: Scanner) -> None:
"""Register a scanner instance for use by the engine."""
Expand All @@ -59,6 +60,7 @@ def run(
use_default_excludes: bool = True,
sbom_path: str | None = None,
sbom_format: str | None = None,
raw_output_dir: str | None = None,
) -> ScanSummary:
"""Run scanners and return an aggregated ScanSummary.

Expand All @@ -79,6 +81,14 @@ def run(
attribute is True, auto-enables them regardless of
argus.yml, and threads the SBOM path through
``config_dict['sbom_path']``.
raw_output_dir: when set, ``_run_in_container`` copies each
scanner's raw output files (``results.json``,
``stdout.txt``, ``*.sarif``) into
``<raw_output_dir>/<scanner_name>/`` before the
per-scanner tempdir is cleaned up. Mirrors the
container-scan flow's ``raw/`` artifact preservation
so users can drill into individual scanner output
regardless of which scan flow produced it.
"""
from .exclusions import build_exclusion_set, log_exclusion_set

Expand All @@ -87,6 +97,7 @@ def run(
self._use_default_excludes = use_default_excludes
self._sbom_path = sbom_path
self._sbom_format = sbom_format
self._raw_output_root = raw_output_dir

# Validate sbom_format if provided
if sbom_format is not None and sbom_format not in SBOM_FORMAT_EXTENSIONS:
Expand Down Expand Up @@ -710,6 +721,28 @@ def _run_in_container(
result_files = [stdout_file]
logger.debug("No output files — captured stdout (%d bytes)", len(proc.stdout))

# Persist raw scanner output (best-effort) before the
# tempdir is wiped. Mirrors the container-scan flow's
# ``raw/`` artifact preservation: every scanner gets its
# own subdir under ``<raw_output_root>/<scanner.name>/``
# so ``argus-results.json`` (the canonical artifact)
# lives next to the per-scanner files (results.json,
# *.sarif, stdout.txt) for forensics or manual triage.
# Errors during copy are non-fatal — the scan succeeded,
# the canonical JSON is still emitted upstream.
if self._raw_output_root and result_files:
try:
target_dir = Path(self._raw_output_root) / scanner.name
target_dir.mkdir(parents=True, exist_ok=True)
for src in result_files:
if src.exists() and src.stat().st_size > 0:
shutil.copy2(src, target_dir / src.name)
except OSError as exc:
logger.warning(
"Failed to persist raw output for '%s' under %s: %s",
scanner.name, self._raw_output_root, exc,
)

if result_files:
logger.debug(
"Output files: %s",
Expand Down
Loading
Loading