feat(reporters): always emit canonical argus-results.json regardless of config

eFAILution · eFAILution · commit c9e4ca9869fb · 2026-05-05T14:18:01.000-04:00
Continues the work in this PR by addressing the *root* of the
missing-results pitfall: the canonical scan artifact, not just the
error message.

Before this commit, ``reporting.formats`` controlled whether
``argus-results.json`` got written. A config like
``formats: [terminal, sarif]`` would silently break ``argus view``,
the audit manifest's lossless dump, and ``argus report`` because
the JSON file simply wasn't there.

This commit shifts the contract:
- ``argus-results.json`` is always emitted by the source-scan flow.
  It's the canonical artifact every other Argus surface depends on.
- ``reporting.formats`` now means "which *additional* human-readable
  reports to emit alongside the canonical JSON," not "which
  artifacts exist at all."

Implementation
- New ``argus.reporters.ensure_canonical_json(formats)`` helper —
  idempotent, preserves user-configured ordering, appends ``json``
  when absent. Lives next to ``REPORTER_REGISTRY`` and
  ``CANONICAL_FORMAT`` so the canonical artifact's identity is one
  module-level constant away.
- ``argus/cli.py`` source-scan dispatch loop iterates the
  helper-augmented format list. The diagnoser remains as a
  defensive belt-and-suspenders for legacy result dirs produced
  before this contract was in place.
- ``argus.example.yml`` comment block updated so users see the new
  contract: "argus-results.json is always written; this list is for
  additional reports." Available formats listed inline.

Scope notes
- Container and DAST flows have their own JSON helpers
  (``_write_container_json`` / ``_write_dast_json``) that produce
  domain-shaped summaries, not ``ScanSummary.to_dict()``. They're
  consumed by their own viewers/handling, separate from
  ``argus view``. Adding "always emit argus-results.json" to those
  flows would conflate two different artifacts; left for follow-up
  if a clear use case arises.

Tests (+8)
- ``argus/tests/reporters/test_registry.py`` (7 cases): idempotent
  on input that already lists json; preserves user order;
  defensive-no-mutation; empty-formats edge case; constant-name
  sanity. Decoupled from the cli.py dispatch loop so the helper's
  invariants are testable without spinning up the engine.
- ``argus/tests/test_cli.py`` (1 case): integration regression for
  the source-scan flow — captures the format names the dispatch
  loop requests from ``get_reporter`` when configured with
  ``formats=[terminal]``, asserts ``json`` is in the captured list.

Validation
- Full SDK suite green: 1428 passed (+8 from this commit), 8
  skipped.
- The diagnoser PR's failure-path tests still pass (the failure
  mode is now extremely rare in practice but the messages remain
  in case a user loads an older results dir or hits an unrelated
  path issue).
diff --git a/argus.example.yml b/argus.example.yml
@@ -44,6 +44,11 @@ scanners:
   #   target_url: "http://localhost:3000"
 
 reporting:
+  # ``argus-results.json`` is the canonical scan artifact and is
+  # always written regardless of this list — the viewers, the audit
+  # manifest, and the ``argus report`` subcommand all consume it.
+  # This list selects which *additional* human-readable reports to
+  # emit alongside the JSON. Available: terminal, markdown, sarif.
   formats:
     - terminal
     - sarif
diff --git a/argus/cli.py b/argus/cli.py
@@ -1297,10 +1297,19 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
         finalize_manifest(manifest, exit_code=EXIT_ERROR, output_dir=output_dir)
         return EXIT_ERROR
 
-    # Generate reports
+    # Generate reports.
+    #
+    # ``ensure_canonical_json`` guarantees the source-of-truth artifact
+    # (``argus-results.json``) is always written, regardless of what
+    # the user listed in ``reporting.formats``. The viewers, the audit
+    # manifest, and the ``argus report`` subcommand all consume that
+    # file — keeping it implicitly mandatory means a config like
+    # ``formats: [terminal, sarif]`` no longer silently breaks
+    # ``argus view`` (the diagnoser still helps for legacy result dirs
+    # produced before this contract was in place).
     try:
-        from argus.reporters import get_reporter
-        for fmt in config.reporting.formats:
+        from argus.reporters import ensure_canonical_json, get_reporter
+        for fmt in ensure_canonical_json(config.reporting.formats):
             reporter = get_reporter(fmt)
             reporter.report(summary, output_dir)
             log.debug("Generated %s report", fmt)
diff --git a/argus/reporters/__init__.py b/argus/reporters/__init__.py
@@ -27,3 +27,30 @@ def get_reporter(name: str):
 def available_reporters() -> list[str]:
     """Return list of registered reporter names."""
     return list(REPORTER_REGISTRY.keys())
+
+
+# The single canonical scan artifact. ``argus-results.json`` is consumed
+# by the audit manifest, both viewers (terminal + browser), the
+# ``argus report`` subcommand, and any downstream tooling built on the
+# SDK. Treating it as always-emitted decouples its existence from
+# user-configured ``reporting.formats``: that list now means "which
+# *additional* human-readable reports to emit alongside the canonical
+# JSON," not "which artifacts exist at all." Eliminates the failure
+# mode where a config like ``formats: [terminal, sarif]`` silently
+# breaks ``argus view``.
+CANONICAL_FORMAT = "json"
+
+
+def ensure_canonical_json(formats: list[str]) -> list[str]:
+    """Return the format list with the canonical JSON output guaranteed.
+
+    Idempotent — if the user already lists ``json`` we don't add a
+    duplicate (which would write the file twice). Order is preserved
+    so the user's terminal/markdown/sarif reports still print in the
+    sequence they configured; the canonical JSON is appended at the
+    end so it's always the last reporter to run (its dict-dump output
+    isn't influenced by side-effects of earlier reporters).
+    """
+    if CANONICAL_FORMAT in formats:
+        return list(formats)
+    return [*formats, CANONICAL_FORMAT]
diff --git a/argus/tests/reporters/test_registry.py b/argus/tests/reporters/test_registry.py
@@ -0,0 +1,51 @@
+"""Tests for argus.reporters registry helpers — ``ensure_canonical_json``.
+
+The helper guarantees ``argus-results.json`` is always emitted by the
+source-scan flow, regardless of how the user configures
+``reporting.formats``. That decouples the canonical scan artifact (the
+audit manifest, both viewers, and ``argus report`` all consume it) from
+user choice of additional human-readable reports.
+"""
+
+from __future__ import annotations
+
+from argus.reporters import CANONICAL_FORMAT, ensure_canonical_json
+
+
+class TestEnsureCanonicalJson:
+    def test_appends_json_when_absent(self):
+        assert ensure_canonical_json(["terminal"]) == ["terminal", "json"]
+
+    def test_idempotent_when_json_already_present(self):
+        # User explicitly listed json — don't double-write the file.
+        assert ensure_canonical_json(["json"]) == ["json"]
+
+    def test_preserves_user_order_when_json_already_present(self):
+        # The user's preferred ordering of human reports stays intact.
+        assert ensure_canonical_json(["json", "terminal", "sarif"]) == [
+            "json", "terminal", "sarif",
+        ]
+
+    def test_appends_json_to_multi_format_list(self):
+        # Common production case: user wants terminal + SARIF + audit JSON.
+        assert ensure_canonical_json(["terminal", "sarif"]) == [
+            "terminal", "sarif", "json",
+        ]
+
+    def test_handles_empty_input(self):
+        # Edge case: a config with ``formats: []`` still produces the
+        # canonical artifact. Without this, the viewers would silently
+        # fail downstream.
+        assert ensure_canonical_json([]) == ["json"]
+
+    def test_does_not_mutate_input(self):
+        # Defensive: the helper must not mutate the caller's list, since
+        # the same list lives on the user's ArgusConfig.
+        formats = ["terminal"]
+        ensure_canonical_json(formats)
+        assert formats == ["terminal"]
+
+    def test_canonical_format_constant_is_json(self):
+        # Sanity-check the constant name. If we ever rename the
+        # canonical artifact, every consumer downstream needs to know.
+        assert CANONICAL_FORMAT == "json"
diff --git a/argus/tests/test_cli.py b/argus/tests/test_cli.py
@@ -290,6 +290,69 @@ def test_scan_unknown_scanner_returns_error(self, monkeypatch, capsys):
         captured = capsys.readouterr()
         assert "unknown scanner 'nonexistent'" in captured.err
 
+    def test_scan_source_always_emits_canonical_json(self, monkeypatch, tmp_path):
+        """Regression for Option C: argus-results.json must be written
+        regardless of the user's ``reporting.formats``. Captures the
+        format names the cli.py loop asks ``get_reporter`` for, and
+        asserts 'json' is in the list even though the user configured
+        formats=[terminal] only."""
+        from argus.core.config import ArgusConfig, ReportingConfig, ExecutionConfig
+        from argus.core.models import ScanSummary
+
+        config = ArgusConfig(
+            reporting=ReportingConfig(
+                output_dir=str(tmp_path),
+                formats=["terminal"],   # deliberately omits json
+                severity_threshold=None,
+            ),
+            execution=ExecutionConfig(),
+        )
+        monkeypatch.setattr(
+            "argus.core.config.ArgusConfig.load",
+            lambda _path: config,
+        )
+
+        summary = ScanSummary(results=[], severity_threshold=None)
+        monkeypatch.setattr(
+            "argus.core.engine.ArgusEngine.__init__",
+            lambda self, _cfg: setattr(self, "config", config)
+            or setattr(self, "_scanners", {}),
+        )
+        monkeypatch.setattr(
+            "argus.core.engine.ArgusEngine.run",
+            lambda self, **kwargs: summary,
+        )
+        monkeypatch.setattr(
+            "argus.core.engine.ArgusEngine.register_scanner",
+            lambda self, s: None,
+        )
+        monkeypatch.setattr("argus.scanners.get_available_scanners", lambda: [])
+
+        # Capture every format name the dispatch loop requests so we
+        # can assert canonical JSON was demanded alongside the user's
+        # configured formats.
+        requested: list[str] = []
+
+        def capture_reporter(fmt):
+            requested.append(fmt)
+            return MagicMock()
+
+        monkeypatch.setattr("argus.reporters.get_reporter", capture_reporter)
+        monkeypatch.setattr("argus.audit.get_logger", lambda *a, **kw: MagicMock())
+        monkeypatch.setattr(
+            "argus.audit.create_manifest",
+            lambda **kw: MagicMock(execution_backend=None),
+        )
+        monkeypatch.setattr("argus.audit.finalize_manifest", lambda *a, **kw: None)
+
+        args = _make_scan_args(output_dir=str(tmp_path))
+        cmd_scan(args)
+
+        # Canonical JSON is requested even though config didn't list it.
+        # User's terminal report is still emitted.
+        assert "json" in requested
+        assert "terminal" in requested
+
     def test_scan_source_runs_engine(self, monkeypatch, tmp_path):
         """A valid scan with no findings should call engine.run and return EXIT_SUCCESS."""
         from argus.core.config import ArgusConfig, ReportingConfig, ExecutionConfig