huntridge-labs · eFAILution · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/.ai/errors.yaml b/.ai/errors.yaml
@@ -374,3 +374,138 @@ scn_detector_errors:
           how: "Set enable_ai_fallback: true and provide ANTHROPIC_API_KEY"
         - description: "Review manually"
           how: "MANUAL_REVIEW is the expected safe default for unrecognized changes"
+
+  # ==============================================================================
+  # SCANNER EXECUTION FAILURES
+  # ==============================================================================
+
+  - pattern: "exit 127|exec.*not found"
+    category: scanner-execution
+    context: "Container scanner exits 127 immediately (e.g. osv-scanner)"
+
+    root_cause: |
+      Docker's --entrypoint override does NOT consult the image's $PATH
+      when given a bare binary name. Some images declare an absolute
+      entrypoint (e.g. ghcr.io/google/osv-scanner uses
+      ENTRYPOINT ["/osv-scanner"]) and require the absolute path on
+      the override. A bare ``osv-scanner`` resolves nowhere and exits
+      127.
+
+    solution:
+      steps:
+        - "Run ``docker image inspect <image>`` and read ``Config.Entrypoint``"
+        - "Set ``container_entrypoint`` on the scanner class to the absolute path from that field"
+      verification: "Engine strips argv[0] for ENTRYPOINT-based images, so build_args may keep the bare name"
+
+    reference: "argus/scanners/osv.py — see container_entrypoint = '/osv-scanner'"
+
+  - pattern: "yamllint.*PASS.*0 findings|lint-yaml.*passes despite errors"
+    category: scanner-execution
+    context: "Linter exits non-zero but reports zero findings and Status: PASS"
+
+    root_cause: |
+      Tools like yamllint use exit codes to differentiate happy-path
+      lint failures (exit 1, with parseable findings on stdout) from
+      runtime errors (exit ≥ 2, empty stdout). Naive callers that map
+      empty stdout to ``[]`` lose the runtime-error signal entirely
+      and surface ``Status: PASS`` even when the tool failed to run.
+
+    solution:
+      steps:
+        - "Distinguish exit codes: 0 = clean, 1 = findings, ≥2 = real failure"
+        - "When exit ≥ 2 with empty findings, set metadata['execution_failed']=True and metadata['execution_failure_reason']=<stderr summary>"
+        - "Use the same shape the engine container path emits — the terminal reporter and --fail-on-scanner-error key off these exact field names"
+
+    reference: "argus/linters/yamllint.py — scan() exit-code branching"
+
+  - pattern: "scanner produced no output|execution_failed"
+    category: scanner-execution
+    context: "Reporter shows Warning row but Status: PASS contradicts it"
+
+    root_cause: |
+      Threshold compliance (``ScanSummary.passed``) and execution
+      success are independent signals. A scanner that fails to run
+      produces zero findings, which alone passes any threshold. The
+      reporter must label PASS as ``PASS (degraded — some scanners
+      did not run)`` whenever any scanner has
+      metadata['execution_failed']=True, otherwise the Warning above
+      and the Status below contradict each other.
+
+    solution:
+      steps:
+        - "Reporter checks for any result.metadata.get('execution_failed')"
+        - "If passed and any failed: print 'Status: PASS (degraded — N did not run, M unparsable)'"
+        - "Add --fail-on-scanner-error in CI for hard-fail behavior"
+
+    reference: "argus/reporters/terminal.py::_print_status"
+
+  - pattern: "0 findings.*known to be vulnerable|JSONDecodeError.*results.json"
+    category: scanner-execution
+    context: "Scanner produced output but parser couldn't interpret it (third state)"
+
+    root_cause: |
+      Distinct from execution-failure: the scanner ran, exited, and
+      wrote a results file — we just couldn't parse what came out
+      (schema drift, truncated JSON, mixed text+JSON). Previously
+      this surfaced as a stack trace from the engine's exception
+      handler and got rolled up as a generic "scanner failed".
+      Reporters and CI gates now have a fourth state for it:
+      ``parse_failed`` + ``parse_failure_reason`` (carries the
+      exception type and a clipped output head).
+
+    solution:
+      steps:
+        - "Engine's container path wraps scanner.parse_results in try/except"
+        - "On any Exception, set metadata['parse_failed']=True and metadata['parse_failure_reason']"
+        - "scanner_template.run_subprocess_scan does the same for local execution"
+        - "TerminalReporter renders parse_failed in its own warning block"
+        - "--fail-on-scanner-error fires on parse_failed too"
+
+    reference: "argus/core/engine.py::_run_in_container — try/except around parse_results"
+
+  # ==============================================================================
+  # WINDOWS-SPECIFIC SCANNER ERRORS
+  # ==============================================================================
+
+  - pattern: "PermissionError.*WinError 5.*Access is denied|yamllint.*Access is denied"
+    category: scanner-execution
+    context: "yamllint launches with PermissionError on Windows hosts with AppLocker / SRP"
+
+    root_cause: |
+      AppLocker or Software Restriction Policy on Windows blocks
+      executable launches from user AppData paths (where pip --user
+      and virtualenv typically install scripts). The python
+      interpreter itself is whitelisted, so loading the same
+      package via ``python -m yamllint`` works on the same
+      machine.
+
+    solution:
+      steps:
+        - "YamllintLinter._run_with_windows_fallback wraps subprocess.run"
+        - "On sys.platform == 'win32', PermissionError/OSError triggers a retry with [sys.executable, '-m', 'yamllint'] + cmd[1:]"
+        - "FileNotFoundError still propagates so 'yamllint not installed' renders cleanly"
+        - "Linux/macOS bypass the fallback — Linux PermissionError is a genuine bug, not a policy case"
+
+    reference: "argus/linters/yamllint.py::_run_with_windows_fallback"
+
+  - pattern: "UnicodeDecodeError.*charmap codec.*can't decode byte"
+    category: scanner-execution
+    context: "Scanner result decode fails on Windows with cp1252 default encoding"
+
+    root_cause: |
+      Docker container output (and most CLI tool output) is UTF-8.
+      ``subprocess.run(text=True)`` and ``Path.read_text()`` fall
+      back to the platform default encoding when ``encoding=`` is
+      omitted — cp1252 on Windows. Any non-ASCII byte (CVE
+      descriptions, accented file paths, scanner banners) raises
+      UnicodeDecodeError mid-scan.
+
+    solution:
+      steps:
+        - "Engine docker subprocess: encoding='utf-8', errors='replace'"
+        - "scanner_template subprocess: same"
+        - "All scanner.parse_results read_text() calls: same"
+        - "Yamllint subprocess + Windows fallback: same"
+      note: "errors='replace' is preferred over 'strict' — a security tool showing � is better than crashing on otherwise-usable output"
+
+    reference: "argus/core/engine.py and every argus/scanners/*.py read_text()"
diff --git a/argus/cli.py b/argus/cli.py
@@ -1693,20 +1693,37 @@ def _cmd_source_scan(args: argparse.Namespace) -> int:
         r.scanner for r in summary.results
         if r.metadata.get("execution_failed")
     ]
+    scanner_parse_failures = [
+        r.scanner for r in summary.results
+        if r.metadata.get("parse_failed")
+    ]
     if not summary.passed:
         exit_code = EXIT_FINDINGS
     elif sbom_batch_failures:
         exit_code = EXIT_ERROR
     elif (
         getattr(args, "fail_on_scanner_error", False)
-        and scanner_execution_failures
+        and (scanner_execution_failures or scanner_parse_failures)
     ):
-        log.error(
-            "Exiting non-zero: %d scanner(s) produced no output (%s) and "
-            "--fail-on-scanner-error is set.",
-            len(scanner_execution_failures),
-            ", ".join(scanner_execution_failures),
-        )
+        # Both states represent "the scan didn't fully succeed":
+        # execution_failed = couldn't run; parse_failed = ran but
+        # output unintelligible. From a CI gating perspective they're
+        # equivalent — the user asked for a hard fail when scanners
+        # don't deliver clean results.
+        if scanner_execution_failures:
+            log.error(
+                "Exiting non-zero: %d scanner(s) did not run cleanly "
+                "(%s) and --fail-on-scanner-error is set.",
+                len(scanner_execution_failures),
+                ", ".join(scanner_execution_failures),
+            )
+        if scanner_parse_failures:
+            log.error(
+                "Exiting non-zero: %d scanner(s) produced unparsable "
+                "output (%s) and --fail-on-scanner-error is set.",
+                len(scanner_parse_failures),
+                ", ".join(scanner_parse_failures),
+            )
         exit_code = EXIT_ERROR
     else:
         exit_code = EXIT_SUCCESS

diff --git a/argus/core/engine.py b/argus/core/engine.py
@@ -2,6 +2,7 @@
 
 import logging
 import os
+import platform
 import shutil
 import subprocess
 import tempfile
@@ -674,7 +675,15 @@ def _run_in_container(
             #  - holds only one scan's transient output (no secrets;
             #    findings travel through ``parse_results`` and end up
             #    in the user-specified output_dir, never here).
-            os.chmod(output_dir, 0o777)
+            #
+            # Skip on Windows: NTFS doesn't honor POSIX bits, ``os.chmod``
+            # only flips the read-only attribute, and Docker Desktop on
+            # Windows handles uid mapping for bind mounts differently
+            # (it doesn't suffer from the macOS uid-mismatch failure mode
+            # this guard exists for). Calling ``chmod 0o777`` there is
+            # at best a no-op and at worst confusing in stack traces.
+            if platform.system() != "Windows":
+                os.chmod(output_dir, 0o777)
 
             docker_cmd = [
                 self._runtime, "run", "--rm",
@@ -738,10 +747,21 @@ def _run_in_container(
             )
 
             start = time.monotonic()
+            # Docker container output is always UTF-8. Without
+            # ``encoding='utf-8'``, ``text=True`` falls back to the
+            # platform default — cp1252 on Windows — which raises
+            # ``UnicodeDecodeError`` on any non-ASCII byte the
+            # scanner emits (CVE descriptions, file paths with
+            # non-ASCII characters, etc.). ``errors='replace'`` is
+            # a safe fallback over ``strict``: a security tool
+            # showing ``�`` is better than crashing the whole
+            # scan on output we'd otherwise be able to use.
             proc = subprocess.run(
                 docker_cmd,
                 capture_output=True,
                 text=True,
+                encoding="utf-8",
+                errors="replace",
             )
             elapsed = int((time.monotonic() - start) * 1000)
 
@@ -854,35 +874,65 @@ def _run_in_container(
                         f"no output files and no stdout (exit={proc.returncode})"
                     )
             if result_files and hasattr(scanner, "parse_results"):
-                parsed = scanner.parse_results(result_files[0])
-                # parse_results may return either a list of Findings,
-                # a ``(list, int)`` tuple (legacy passed_count channel,
-                # used by linters), or a ``(list, dict)`` tuple (extra
-                # metadata merged into ScanResult.metadata — used by
-                # Grype to flag "source.target=unknown" which means
-                # "couldn't identify packages" rather than "nothing
-                # vulnerable").
-                if isinstance(parsed, tuple):
-                    findings, extra = parsed
-                    if isinstance(extra, int):
-                        metadata_extra["passed_count"] = extra
-                    elif isinstance(extra, dict):
-                        metadata_extra.update(extra)
-                        # Warn at the engine layer too so the signal is
-                        # visible even when a reporter doesn't render
-                        # per-scanner metadata.
-                        if "warning" in extra:
-                            logger.warning(
-                                "Scanner '%s': %s",
-                                scanner.name, extra["warning"],
-                            )
+                try:
+                    parsed = scanner.parse_results(result_files[0])
+                except Exception as exc:
+                    # Scanner produced output but the parser couldn't
+                    # interpret it (e.g. osv-scanner v2 rev'd its
+                    # schema, truncated output, mixed text+JSON). This
+                    # is a third state distinct from "execution failed"
+                    # and "ran clean" — we surface it as
+                    # ``parse_failed`` so the reporter can show "OSV
+                    # produced 12KB of output we couldn't parse" rather
+                    # than the misleading "no output produced". The
+                    # parser bug doesn't crash the rest of the scan;
+                    # other scanners' results are still useful.
+                    head = ""
+                    try:
+                        head = result_files[0].read_text(
+                            encoding="utf-8", errors="replace",
+                        )[:200]
+                    except OSError:
+                        head = "<unreadable>"
+                    metadata_extra["parse_failed"] = True
+                    metadata_extra["parse_failure_reason"] = (
+                        f"{type(exc).__name__}: {exc}. "
+                        f"output head: {head!r}"
+                    )
+                    logger.warning(
+                        "Scanner '%s' produced output but parse failed: %s",
+                        scanner.name, exc,
+                    )
+                    findings = []
                 else:
-                    findings = parsed
-                logger.debug(
-                    "Parsed %d finding(s) from %s",
-                    len(findings),
-                    result_files[0].name,
-                )
+                    # parse_results may return either a list of Findings,
+                    # a ``(list, int)`` tuple (legacy passed_count channel,
+                    # used by linters), or a ``(list, dict)`` tuple (extra
+                    # metadata merged into ScanResult.metadata — used by
+                    # Grype to flag "source.target=unknown" which means
+                    # "couldn't identify packages" rather than "nothing
+                    # vulnerable").
+                    if isinstance(parsed, tuple):
+                        findings, extra = parsed
+                        if isinstance(extra, int):
+                            metadata_extra["passed_count"] = extra
+                        elif isinstance(extra, dict):
+                            metadata_extra.update(extra)
+                            # Warn at the engine layer too so the signal is
+                            # visible even when a reporter doesn't render
+                            # per-scanner metadata.
+                            if "warning" in extra:
+                                logger.warning(
+                                    "Scanner '%s': %s",
+                                    scanner.name, extra["warning"],
+                                )
+                    else:
+                        findings = parsed
+                    logger.debug(
+                        "Parsed %d finding(s) from %s",
+                        len(findings),
+                        result_files[0].name,
+                    )
 
             return ScanResult(
                 scanner=scanner.name,