Skip to content

Commit 8b76ac5

Browse files
Classify trusted perf soak evidence
Classify trusted perf soak evidence
1 parent 37fa87a commit 8b76ac5

3 files changed

Lines changed: 68 additions & 2 deletions

File tree

docs/bounded-growth.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ service logs under `build/perf/`. A trusted bounded-growth run must include:
9494
sample was collected successfully; missing resource samples fail the run
9595
instead of being recorded as zero-count evidence;
9696
- GitHub/runner provenance in `summary.json` (`GITHUB_SHA`, `GITHUB_RUN_ID`,
97-
runner name/OS/arch, Compose project, and the tested base URL when present);
97+
runner name/OS/arch/environment, Compose project, and the tested base URL
98+
when present);
9899
- the SHA-256 digest of `config/dw-bounded-growth.php` so the artifact can be
99100
tied back to the policy that was active for the run.
100101

@@ -110,6 +111,13 @@ bounded cache family produced growth instead of only reporting a total
110111
`dw_perf_redis_server_keys_by_policy{policy="..."}` for optional remote-write
111112
alerting.
112113

114+
`summary.json` also includes `evidence.trust` with the
115+
`trusted_long_soak_v1` profile. Short CI smokes can still pass, but they are
116+
classified as ineligible for the issue-closing trusted evidence unless they run
117+
for at least one hour, use compose-backed resource sampling, run on a
118+
self-hosted runner when GitHub exposes the runner environment, meet sample
119+
coverage, and have no bounded-growth assertion failures.
120+
113121
Per-policy limits can be enforced with JSON maps keyed by policy ID:
114122
`DW_PERF_MAX_SERVER_CACHE_KEYS_BY_POLICY` for maximum observed keys and
115123
`DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY` for post-drain keys. Unknown

scripts/perf/server_soak.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,12 +654,50 @@ def evidence_provenance(base_url: str, compose_project: str) -> dict[str, Any]:
654654
"runner_name": os.environ.get("RUNNER_NAME", ""),
655655
"runner_os": os.environ.get("RUNNER_OS", ""),
656656
"runner_arch": os.environ.get("RUNNER_ARCH", ""),
657+
"runner_environment": os.environ.get("RUNNER_ENVIRONMENT", ""),
657658
"compose_project": compose_project,
658659
"base_url": base_url,
659660
"bounded_growth_policy_sha256": file_sha256(policy_path),
660661
}
661662

662663

664+
def evidence_trust_profile(
665+
*,
666+
duration_seconds: int,
667+
compose_project: str,
668+
runner_environment: str,
669+
periodic_sample_count: int,
670+
minimum_trusted_samples: int,
671+
sampling_health: dict[str, Any],
672+
failures: list[str],
673+
) -> dict[str, Any]:
674+
minimum_duration_seconds = 3600
675+
reasons = []
676+
677+
if duration_seconds < minimum_duration_seconds:
678+
reasons.append(f"duration below trusted long-soak minimum {minimum_duration_seconds}s")
679+
if not compose_project:
680+
reasons.append("compose-backed resource sampling was not configured")
681+
if runner_environment and runner_environment != "self-hosted":
682+
reasons.append(f"runner environment is {runner_environment}, not self-hosted")
683+
if periodic_sample_count < minimum_trusted_samples:
684+
reasons.append("periodic sample coverage below trusted minimum")
685+
if int(sampling_health.get("unhealthy_samples") or 0) > 0:
686+
reasons.append("compose-backed resource sampling has unhealthy samples")
687+
if failures:
688+
reasons.append("bounded-growth assertions failed")
689+
690+
return {
691+
"profile": "trusted_long_soak_v1",
692+
"eligible": len(reasons) == 0,
693+
"minimum_duration_seconds": minimum_duration_seconds,
694+
"runner_environment": runner_environment,
695+
"requires_self_hosted_runner": True,
696+
"requires_compose_resource_sampling": True,
697+
"reasons": reasons,
698+
}
699+
700+
663701
def main() -> int:
664702
args = parse_args()
665703

@@ -758,6 +796,8 @@ def main() -> int:
758796
observed_sample_coverage = periodic_sample_count / expected_samples
759797
sampling_health = sample_health(samples, args.compose_project)
760798

799+
provenance = evidence_provenance(base_url, args.compose_project)
800+
761801
summary = {
762802
"duration_seconds": args.duration_seconds,
763803
"elapsed_seconds": round(elapsed_seconds, 2),
@@ -799,7 +839,7 @@ def main() -> int:
799839
"evidence": {
800840
"started_at": started_at.isoformat().replace("+00:00", "Z"),
801841
"finished_at": finished_at.isoformat().replace("+00:00", "Z"),
802-
"provenance": evidence_provenance(base_url, args.compose_project),
842+
"provenance": provenance,
803843
},
804844
}
805845

@@ -867,6 +907,16 @@ def main() -> int:
867907
metrics.mark_assertion_failed()
868908
summary["failures"] = failures
869909

910+
summary["evidence"]["trust"] = evidence_trust_profile(
911+
duration_seconds=args.duration_seconds,
912+
compose_project=args.compose_project,
913+
runner_environment=str(provenance.get("runner_environment") or ""),
914+
periodic_sample_count=periodic_sample_count,
915+
minimum_trusted_samples=min_samples,
916+
sampling_health=sampling_health,
917+
failures=failures,
918+
)
919+
870920
metrics_path.write_text(metrics.prometheus(), encoding="utf-8")
871921
summary_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
872922
print(json.dumps(summary, indent=2, sort_keys=True))

tests/Unit/ServerPerfHarnessContractTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ public function test_soak_summary_records_trusted_evidence_fields(): void
4444
'bounded_growth_policy_sha256',
4545
'GITHUB_RUN_ID',
4646
'RUNNER_NAME',
47+
'RUNNER_ENVIRONMENT',
48+
'evidence_trust_profile',
49+
'trusted_long_soak_v1',
50+
'minimum_duration_seconds',
51+
'requires_self_hosted_runner',
52+
'requires_compose_resource_sampling',
53+
'duration below trusted long-soak minimum',
54+
'bounded-growth assertions failed',
4755
] as $needle) {
4856
$this->assertStringContainsString($needle, $source, "Perf soak summary must retain {$needle}");
4957
}

0 commit comments

Comments
 (0)