Skip to content

Commit 0df216f

Browse files
Require complete perf cache threshold evidence
Require complete perf cache threshold evidence
1 parent 50d67f7 commit 0df216f

4 files changed

Lines changed: 69 additions & 8 deletions

File tree

docs/bounded-growth.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,12 @@ value, include GitHub Actions provenance (`GITHUB_REPOSITORY`, `GITHUB_REF`,
132132
come from the `Server Perf` workflow in `durable-workflow/server` on
133133
`refs/heads/main`, use a scheduled or manual dispatch event, have a clean
134134
tracked working tree, have `GITHUB_SHA` match the checked-out source commit,
135-
meet sample coverage, and have no bounded-growth assertion failures. A local
136-
run, pull-request smoke, unrelated workflow, or feature-branch workflow can
137-
still produce useful artifacts, but it cannot satisfy the trusted long-soak
138-
evidence profile just by setting `RUNNER_ENVIRONMENT=self-hosted`.
135+
meet sample coverage, include complete per-policy maximum and final cache
136+
threshold maps for every declared cache policy, and have no bounded-growth
137+
assertion failures. A local run, pull-request smoke, unrelated workflow, or
138+
feature-branch workflow can still produce useful artifacts, but it cannot
139+
satisfy the trusted long-soak evidence profile just by setting
140+
`RUNNER_ENVIRONMENT=self-hosted`.
139141
The CI smoke workflow sets `RUNNER_ENVIRONMENT=github-hosted` so those artifacts
140142
are traceable without being eligible for the trusted long-soak profile.
141143

@@ -144,4 +146,6 @@ Per-policy limits can be enforced with JSON maps keyed by policy ID:
144146
`DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY` for post-drain keys. Unknown
145147
policy IDs, non-integer values, and negative limits fail before the soak starts
146148
so evidence cannot silently drift away from the inventory in
147-
`config/dw-bounded-growth.php`.
149+
`config/dw-bounded-growth.php`. Trusted long-soak evidence is also marked
150+
ineligible when either per-policy threshold map is omitted or incomplete, even
151+
if the aggregate cache-key ceilings pass.

docs/perf-runner.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,10 @@ limits in addition to the aggregate `server:*` cache ceiling. Each value must
101101
be a JSON object keyed by a `config/dw-bounded-growth.php` cache policy ID with
102102
non-negative integer limits. The map must include every declared cache policy;
103103
unknown policy IDs, missing policy IDs, and non-integer limits fail before load
104-
starts so a typo or partial map cannot silently weaken the evidence. The
105-
workflow file contains the canonical smoke and long-soak threshold maps, for
106-
example:
104+
starts so a typo or partial map cannot silently weaken the evidence. A trusted
105+
long-soak artifact is marked ineligible if either per-policy threshold map is
106+
omitted or incomplete. The workflow file contains the canonical smoke and
107+
long-soak threshold maps, for example:
107108

108109
```bash
109110
DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY='{"workflow_task_poll_requests":0,"long_poll_signals":0,"workflow_query_tasks":0,"task_queue_admission_locks":0,"task_queue_dispatch_counters":0,"workflow_task_expired_lease_recovery":0,"history_retention_inline":0,"readiness_probe":0}'

scripts/perf/server_soak.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,8 @@ def evidence_trust_profile(
692692
periodic_sample_count: int,
693693
minimum_trusted_samples: int,
694694
sampling_health: dict[str, Any],
695+
max_server_cache_keys_by_policy: dict[str, int],
696+
max_final_server_cache_keys_by_policy: dict[str, int],
695697
failures: list[str],
696698
) -> dict[str, Any]:
697699
minimum_duration_seconds = 3600
@@ -723,6 +725,12 @@ def evidence_trust_profile(
723725
reasons.append("periodic sample coverage below trusted minimum")
724726
if int(sampling_health.get("unhealthy_samples") or 0) > 0:
725727
reasons.append("compose-backed resource sampling has unhealthy samples")
728+
reasons.extend(
729+
per_policy_threshold_reasons(
730+
max_server_cache_keys_by_policy=max_server_cache_keys_by_policy,
731+
max_final_server_cache_keys_by_policy=max_final_server_cache_keys_by_policy,
732+
)
733+
)
726734
if failures:
727735
reasons.append("bounded-growth assertions failed")
728736

@@ -739,10 +747,38 @@ def evidence_trust_profile(
739747
"requires_github_sha_match": True,
740748
"requires_compose_resource_sampling": True,
741749
"requires_clean_tracked_working_tree": True,
750+
"requires_per_policy_cache_thresholds": True,
742751
"reasons": reasons,
743752
}
744753

745754

755+
def per_policy_threshold_reasons(
756+
*,
757+
max_server_cache_keys_by_policy: dict[str, int],
758+
max_final_server_cache_keys_by_policy: dict[str, int],
759+
) -> list[str]:
760+
policy_ids = set(SERVER_CACHE_KEY_PATTERNS)
761+
reasons = []
762+
763+
missing_max_policy_ids = sorted(policy_ids - set(max_server_cache_keys_by_policy))
764+
if missing_max_policy_ids:
765+
reasons.append(
766+
"per-policy max cache thresholds missing for: "
767+
+ ", ".join(missing_max_policy_ids)
768+
)
769+
770+
missing_final_policy_ids = sorted(
771+
policy_ids - set(max_final_server_cache_keys_by_policy)
772+
)
773+
if missing_final_policy_ids:
774+
reasons.append(
775+
"per-policy final cache thresholds missing for: "
776+
+ ", ".join(missing_final_policy_ids)
777+
)
778+
779+
return reasons
780+
781+
746782
def github_actions_provenance_present(provenance: dict[str, Any]) -> bool:
747783
required_fields = (
748784
"repository",
@@ -975,6 +1011,8 @@ def main() -> int:
9751011
periodic_sample_count=periodic_sample_count,
9761012
minimum_trusted_samples=min_samples,
9771013
sampling_health=sampling_health,
1014+
max_server_cache_keys_by_policy=args.max_server_cache_keys_by_policy,
1015+
max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy,
9781016
failures=failures,
9791017
)
9801018

tests/Unit/ServerPerfHarnessContractTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ public function test_soak_summary_records_trusted_evidence_fields(): void
7373
'requires_github_sha_match',
7474
'GitHub Actions SHA does not match checked-out source',
7575
'tracked working tree has uncommitted changes',
76+
'requires_per_policy_cache_thresholds',
77+
'per-policy max cache thresholds missing for:',
78+
'per-policy final cache thresholds missing for:',
79+
'per_policy_threshold_reasons',
80+
'max_server_cache_keys_by_policy=args.max_server_cache_keys_by_policy',
81+
'max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy',
7682
'duration below trusted long-soak minimum',
7783
'bounded-growth assertions failed',
7884
] as $needle) {
@@ -147,6 +153,18 @@ public function test_per_policy_cache_threshold_parser_rejects_partial_maps(): v
147153
$this->assertStringContainsString('is missing cache policy thresholds for:', $source);
148154
}
149155

156+
public function test_trusted_perf_evidence_requires_per_policy_cache_thresholds(): void
157+
{
158+
$source = file_get_contents(dirname(__DIR__, 2).'/scripts/perf/server_soak.py');
159+
$this->assertNotFalse($source, 'scripts/perf/server_soak.py must be readable');
160+
161+
$this->assertStringContainsString('def per_policy_threshold_reasons(', $source);
162+
$this->assertStringContainsString('missing_max_policy_ids = sorted(policy_ids - set(max_server_cache_keys_by_policy))', $source);
163+
$this->assertStringContainsString('missing_final_policy_ids = sorted(', $source);
164+
$this->assertStringContainsString('policy_ids - set(max_final_server_cache_keys_by_policy)', $source);
165+
$this->assertStringContainsString('"requires_per_policy_cache_thresholds": True', $source);
166+
}
167+
150168
public function test_ci_perf_jobs_set_runner_environment_provenance(): void
151169
{
152170
$workflow = file_get_contents(dirname(__DIR__, 2).'/.github/workflows/server-perf.yml');

0 commit comments

Comments
 (0)