docs(errors): reframe messaging around clarity and solutions

uittenbroekrobbert · anneschuth · commit 3edc4a4d8e84 · 2026-06-19T14:08:03.000+02:00
Drop the defensive "not your request" / "not you" asides from error
headlines and docs. The neutral Source: line already says where to look,
so headlines lead with what's wrong and what to do — not with deflecting
blame. Same behaviour, friendlier and more useful tone.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -158,9 +158,8 @@ def verb(
 
 ### Error reporting (the diagnosis layer)
 
-Errors must be **honest about where the fault lives** — never make a user-app or
-user-input failure look like the platform is broken. The machinery lives in
-`api/errors.py`:
+Errors must give **clarity and a next step**: say what's wrong, point neutrally at
+where to look, and suggest the fix. The machinery lives in `api/errors.py`:
 
 - **`Fault`** (StrEnum): `USER_INPUT`, `USER_APP`, `USER_CONFIG`, `AUTH`, `PLATFORM`,
   `NETWORK`, `UNKNOWN`. Drives a neutral source label (`FAULT_SOURCE`), color
diff --git a/README.md b/README.md
@@ -74,10 +74,10 @@ zad metrics overview --output json | jq '.cpu_usage'
 
 ## Errors & exit codes
 
-Errors tell you **where the fault lives** — your request, your application, your
-configuration, your credentials, or the ZAD platform — instead of a bare HTTP code.
-A failed image pull is labelled `Source: your application (cluster runtime)`, not
-"the backend is down".
+Errors tell you **what's wrong and what to do next**, with a neutral label for where
+to look — your request, your application, your configuration, your credentials, or the
+ZAD platform — instead of a bare HTTP code. A failed image pull points you straight at
+the image and registry (`Source: your application (cluster runtime)`) with the fix.
 
 Each error carries a structured diagnosis. In `--output json` it's a single object
 on stdout you can branch on in CI/CD:
diff --git a/src/zad_cli/api/client.py b/src/zad_cli/api/client.py
@@ -61,7 +61,7 @@ def _parse_v2_response(model_cls: type, payload: Any) -> dict:
             f"Unexpected API response shape for {model_cls.__name__}: {e}",
             diagnosis=Diagnosis(
                 fault=Fault.PLATFORM,
-                headline="ZAD returned a response this CLI couldn't read — a platform/version mismatch, not you.",
+                headline="ZAD returned a response this CLI couldn't read — likely a CLI/API version mismatch.",
                 summary=f"Schema {model_cls.__name__} failed to validate.",
                 next_steps=[
                     "Retry shortly (exit code 2 = transient).",
diff --git a/src/zad_cli/api/errors.py b/src/zad_cli/api/errors.py
@@ -1,21 +1,20 @@
-"""Honest, source-labelled diagnosis of API and task failures.
-
-The upstream API already attributes failures accurately: ``ErrorCategory`` on
-cluster errors, ``ComponentFailureInfo`` (with log tails) on failed deployment
-tasks, ``HTTPValidationError`` on bad input, and ``error_type`` on task results.
-The CLI used to collapse all of that into a bare ``HTTP 500`` / ``Task failed``
-string, which made every failure look like the platform was broken.
-
-This module turns those raw signals into a :class:`Diagnosis`: a clear,
-**source-labelled** headline ("Source: your application"), the concrete message,
-the backend's own explanation, and a next step. The fault vocabulary is kept in
-lockstep with the OpenAPI spec by ``tests/test_spec_conformance.py`` (strict
-coupling: drift fails CI) while runtime parsing degrades gracefully on unknown
-values (loose coupling).
-
-Honesty rule: never claim more certainty than the data supports. When the API
-gives no category, the fault is ``UNKNOWN`` and we point at the logs rather than
-guessing whose fault it is.
+"""Clear, actionable diagnosis of API and task failures.
+
+The goal is simple: tell the user *what went wrong and what to do next*. The
+upstream API already carries the signal for that — ``ErrorCategory`` on cluster
+errors, ``ComponentFailureInfo`` (with log tails) on failed deployment tasks,
+``HTTPValidationError`` on bad input, ``error_type`` on task results — but a bare
+``HTTP 500`` / ``Task failed`` string throws it away.
+
+This module turns those raw signals into a :class:`Diagnosis`: a plain-language
+headline, a neutral source label so you know where to look ("Source: your
+application"), the concrete message, the backend's own explanation, and a next
+step. The fault vocabulary is kept in lockstep with the OpenAPI spec by
+``tests/test_spec_conformance.py`` (strict coupling: drift fails CI) while runtime
+parsing degrades gracefully on unknown values (loose coupling).
+
+We never claim more certainty than the data supports: when the API gives no
+category, the fault is ``UNKNOWN`` and we point at the logs rather than guessing.
 """
 
 from __future__ import annotations
@@ -288,7 +287,7 @@ def _http_headline(status_code: int, fault: Fault) -> tuple[str, list[str]]:
         )
     if fault is Fault.PLATFORM:
         return (
-            f"ZAD had an internal error (HTTP {status_code}) — this is the platform, not your request.",
+            f"ZAD platform error (HTTP {status_code}) — usually transient.",
             ["Retry shortly (exit code 2 = transient). If it persists, report it with the time of the call."],
         )
     return (f"Request rejected (HTTP {status_code}).", [])
@@ -335,13 +334,13 @@ def diagnose_task_failure(error_message: str | None, result: object) -> Diagnosi
     )
 
     if fault is Fault.USER_APP:
-        headline = "Your application failed to run on the cluster — ZAD applied your config, the workload didn't start."
+        headline = "Your application didn't start on the cluster (the deploy reached the cluster; the workload failed)."
         next_steps.append("Inspect `zad logs -d <deployment>` and `zad deployment describe <deployment>`.")
     elif fault is Fault.USER_CONFIG:
-        headline = "ZAD could not apply your configuration."
+        headline = "Your configuration couldn't be applied."
         next_steps.append("Fix your git repo/manifests, then `zad deployment refresh`.")
     else:
-        headline = "The operation failed, and ZAD did not report a category."
+        headline = "The operation failed. Check the details below for the cause."
         next_steps.append("Run `zad task status <id>` and `zad logs` for the full output.")
 
     return Diagnosis(fault=fault, headline=headline, summary=summary, details=details, next_steps=next_steps)