Require complete perf cache threshold evidence

durable-workflow-ops · durable-workflow-ops · commit 0df216fe23a5 · 2026-04-22T16:28:45.000Z
diff --git a/docs/bounded-growth.md b/docs/bounded-growth.md
@@ -132,10 +132,12 @@ value, include GitHub Actions provenance (`GITHUB_REPOSITORY`, `GITHUB_REF`,
 come from the `Server Perf` workflow in `durable-workflow/server` on
 `refs/heads/main`, use a scheduled or manual dispatch event, have a clean
 tracked working tree, have `GITHUB_SHA` match the checked-out source commit,
-meet sample coverage, and have no bounded-growth assertion failures. A local
-run, pull-request smoke, unrelated workflow, or feature-branch workflow can
-still produce useful artifacts, but it cannot satisfy the trusted long-soak
-evidence profile just by setting `RUNNER_ENVIRONMENT=self-hosted`.
+meet sample coverage, include complete per-policy maximum and final cache
+threshold maps for every declared cache policy, and have no bounded-growth
+assertion failures. A local run, pull-request smoke, unrelated workflow, or
+feature-branch workflow can still produce useful artifacts, but it cannot
+satisfy the trusted long-soak evidence profile just by setting
+`RUNNER_ENVIRONMENT=self-hosted`.
 The CI smoke workflow sets `RUNNER_ENVIRONMENT=github-hosted` so those artifacts
 are traceable without being eligible for the trusted long-soak profile.
 
@@ -144,4 +146,6 @@ Per-policy limits can be enforced with JSON maps keyed by policy ID:
 `DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY` for post-drain keys. Unknown
 policy IDs, non-integer values, and negative limits fail before the soak starts
 so evidence cannot silently drift away from the inventory in
-`config/dw-bounded-growth.php`.
+`config/dw-bounded-growth.php`. Trusted long-soak evidence is also marked
+ineligible when either per-policy threshold map is omitted or incomplete, even
+if the aggregate cache-key ceilings pass.
diff --git a/docs/perf-runner.md b/docs/perf-runner.md
@@ -101,9 +101,10 @@ limits in addition to the aggregate `server:*` cache ceiling. Each value must
 be a JSON object keyed by a `config/dw-bounded-growth.php` cache policy ID with
 non-negative integer limits. The map must include every declared cache policy;
 unknown policy IDs, missing policy IDs, and non-integer limits fail before load
-starts so a typo or partial map cannot silently weaken the evidence. The
-workflow file contains the canonical smoke and long-soak threshold maps, for
-example:
+starts so a typo or partial map cannot silently weaken the evidence. A trusted
+long-soak artifact is marked ineligible if either per-policy threshold map is
+omitted or incomplete. The workflow file contains the canonical smoke and
+long-soak threshold maps, for example:
 
 ```bash
 DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY='{"workflow_task_poll_requests":0,"long_poll_signals":0,"workflow_query_tasks":0,"task_queue_admission_locks":0,"task_queue_dispatch_counters":0,"workflow_task_expired_lease_recovery":0,"history_retention_inline":0,"readiness_probe":0}'
diff --git a/scripts/perf/server_soak.py b/scripts/perf/server_soak.py
@@ -692,6 +692,8 @@ def evidence_trust_profile(
     periodic_sample_count: int,
     minimum_trusted_samples: int,
     sampling_health: dict[str, Any],
+    max_server_cache_keys_by_policy: dict[str, int],
+    max_final_server_cache_keys_by_policy: dict[str, int],
     failures: list[str],
 ) -> dict[str, Any]:
     minimum_duration_seconds = 3600
@@ -723,6 +725,12 @@ def evidence_trust_profile(
         reasons.append("periodic sample coverage below trusted minimum")
     if int(sampling_health.get("unhealthy_samples") or 0) > 0:
         reasons.append("compose-backed resource sampling has unhealthy samples")
+    reasons.extend(
+        per_policy_threshold_reasons(
+            max_server_cache_keys_by_policy=max_server_cache_keys_by_policy,
+            max_final_server_cache_keys_by_policy=max_final_server_cache_keys_by_policy,
+        )
+    )
     if failures:
         reasons.append("bounded-growth assertions failed")
 
@@ -739,10 +747,38 @@ def evidence_trust_profile(
         "requires_github_sha_match": True,
         "requires_compose_resource_sampling": True,
         "requires_clean_tracked_working_tree": True,
+        "requires_per_policy_cache_thresholds": True,
         "reasons": reasons,
     }
 
 
+def per_policy_threshold_reasons(
+    *,
+    max_server_cache_keys_by_policy: dict[str, int],
+    max_final_server_cache_keys_by_policy: dict[str, int],
+) -> list[str]:
+    policy_ids = set(SERVER_CACHE_KEY_PATTERNS)
+    reasons = []
+
+    missing_max_policy_ids = sorted(policy_ids - set(max_server_cache_keys_by_policy))
+    if missing_max_policy_ids:
+        reasons.append(
+            "per-policy max cache thresholds missing for: "
+            + ", ".join(missing_max_policy_ids)
+        )
+
+    missing_final_policy_ids = sorted(
+        policy_ids - set(max_final_server_cache_keys_by_policy)
+    )
+    if missing_final_policy_ids:
+        reasons.append(
+            "per-policy final cache thresholds missing for: "
+            + ", ".join(missing_final_policy_ids)
+        )
+
+    return reasons
+
+
 def github_actions_provenance_present(provenance: dict[str, Any]) -> bool:
     required_fields = (
         "repository",
@@ -975,6 +1011,8 @@ def main() -> int:
             periodic_sample_count=periodic_sample_count,
             minimum_trusted_samples=min_samples,
             sampling_health=sampling_health,
+            max_server_cache_keys_by_policy=args.max_server_cache_keys_by_policy,
+            max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy,
             failures=failures,
         )
 
diff --git a/tests/Unit/ServerPerfHarnessContractTest.php b/tests/Unit/ServerPerfHarnessContractTest.php
@@ -73,6 +73,12 @@ public function test_soak_summary_records_trusted_evidence_fields(): void
             'requires_github_sha_match',
             'GitHub Actions SHA does not match checked-out source',
             'tracked working tree has uncommitted changes',
+            'requires_per_policy_cache_thresholds',
+            'per-policy max cache thresholds missing for:',
+            'per-policy final cache thresholds missing for:',
+            'per_policy_threshold_reasons',
+            'max_server_cache_keys_by_policy=args.max_server_cache_keys_by_policy',
+            'max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy',
             'duration below trusted long-soak minimum',
             'bounded-growth assertions failed',
         ] as $needle) {
@@ -147,6 +153,18 @@ public function test_per_policy_cache_threshold_parser_rejects_partial_maps(): v
         $this->assertStringContainsString('is missing cache policy thresholds for:', $source);
     }
 
+    public function test_trusted_perf_evidence_requires_per_policy_cache_thresholds(): void
+    {
+        $source = file_get_contents(dirname(__DIR__, 2).'/scripts/perf/server_soak.py');
+        $this->assertNotFalse($source, 'scripts/perf/server_soak.py must be readable');
+
+        $this->assertStringContainsString('def per_policy_threshold_reasons(', $source);
+        $this->assertStringContainsString('missing_max_policy_ids = sorted(policy_ids - set(max_server_cache_keys_by_policy))', $source);
+        $this->assertStringContainsString('missing_final_policy_ids = sorted(', $source);
+        $this->assertStringContainsString('policy_ids - set(max_final_server_cache_keys_by_policy)', $source);
+        $this->assertStringContainsString('"requires_per_policy_cache_thresholds": True', $source);
+    }
+
     public function test_ci_perf_jobs_set_runner_environment_provenance(): void
     {
         $workflow = file_get_contents(dirname(__DIR__, 2).'/.github/workflows/server-perf.yml');