Skip to content

Commit 34da30b

Browse files
Require trusted server perf evidence
1 parent 0df216f commit 34da30b

5 files changed

Lines changed: 62 additions & 2 deletions

File tree

.github/workflows/server-perf.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ jobs:
166166
DW_PERF_MAX_SERVER_CACHE_KEYS_BY_POLICY: '{"workflow_task_poll_requests":2048,"long_poll_signals":2048,"workflow_query_tasks":128,"task_queue_admission_locks":256,"task_queue_dispatch_counters":256,"workflow_task_expired_lease_recovery":256,"history_retention_inline":128,"readiness_probe":32}'
167167
DW_PERF_MAX_FINAL_SERVER_CACHE_KEYS_BY_POLICY: '{"workflow_task_poll_requests":0,"long_poll_signals":0,"workflow_query_tasks":0,"task_queue_admission_locks":0,"task_queue_dispatch_counters":0,"workflow_task_expired_lease_recovery":0,"history_retention_inline":0,"readiness_probe":0}'
168168
DW_PERF_MAX_SERVER_MEMORY_SLOPE_MB_HOUR: "128"
169+
DW_PERF_REQUIRE_TRUSTED_EVIDENCE: "true"
169170
DW_PERF_REMOTE_WRITE_ENABLED: ${{ github.event_name != 'workflow_dispatch' || inputs.remote_write }}
170171
DW_PERF_REMOTE_WRITE_URL: ${{ vars.DW_PERF_REMOTE_WRITE_URL }}
171172
DW_PERF_REMOTE_WRITE_USERNAME: ${{ vars.DW_PERF_REMOTE_WRITE_USERNAME }}

docs/bounded-growth.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,11 @@ feature-branch workflow can still produce useful artifacts, but it cannot
139139
satisfy the trusted long-soak evidence profile just by setting
140140
`RUNNER_ENVIRONMENT=self-hosted`.
141141
The CI smoke workflow sets `RUNNER_ENVIRONMENT=github-hosted` so those artifacts
142-
are traceable without being eligible for the trusted long-soak profile.
142+
are traceable without being eligible for the trusted long-soak profile. The
143+
self-hosted soak workflow sets `DW_PERF_REQUIRE_TRUSTED_EVIDENCE=true`, so a
144+
scheduled or manual soak fails if the artifact cannot satisfy
145+
`trusted_long_soak_v1` even when the load generator itself stayed inside the
146+
bounded-growth budgets.
143147

144148
Per-policy limits can be enforced with JSON maps keyed by policy ID:
145149
`DW_PERF_MAX_SERVER_CACHE_KEYS_BY_POLICY` for maximum observed keys and

docs/perf-runner.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ signal meaningful.
6363
Both workflow modes pass explicit runner provenance into the artifact. Short
6464
smokes set `RUNNER_ENVIRONMENT=github-hosted`; long soaks set
6565
`RUNNER_ENVIRONMENT=self-hosted`, which is required before `summary.json` can be
66-
classified as trusted long-soak evidence.
66+
classified as trusted long-soak evidence. The self-hosted job also sets
67+
`DW_PERF_REQUIRE_TRUSTED_EVIDENCE=true`, so it fails if the run completes but the
68+
artifact is ineligible for `trusted_long_soak_v1`.
6769

6870
## Local Run
6971

@@ -90,6 +92,8 @@ and requires a checked-out source commit matching `GITHUB_SHA`, so artifacts
9092
from uncommitted source, policy edits, feature branches, forks, unrelated
9193
workflows, pull-request smokes, misconfigured checkouts, or ad hoc local runs
9294
are marked ineligible for the trusted profile.
95+
When `DW_PERF_REQUIRE_TRUSTED_EVIDENCE=true`, the harness turns that ineligible
96+
profile into a failed run and records the profile reasons in `summary.json`.
9397
The harness fails when it cannot collect at least `DW_PERF_MIN_SAMPLE_COVERAGE`
9498
of the expected periodic samples, which defaults to 80%. The final post-drain
9599
sample is included in the artifact but does not count toward the periodic sample

scripts/perf/server_soak.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@
3939
}
4040

4141

42+
def env_bool(name: str, default: bool = False) -> bool:
43+
value = os.environ.get(name)
44+
if value is None:
45+
return default
46+
47+
return value.strip().lower() in ("1", "true", "yes", "on")
48+
49+
4250
class Metrics:
4351
def __init__(self) -> None:
4452
self.lock = threading.Lock()
@@ -204,6 +212,12 @@ def parse_args() -> argparse.Namespace:
204212
default=float(os.environ.get("DW_PERF_MAX_SERVER_MEMORY_SLOPE_MB_HOUR", "0")),
205213
help="If positive and duration is at least 10 minutes, fail when post-warmup server memory slope exceeds this value.",
206214
)
215+
parser.add_argument(
216+
"--require-trusted-evidence",
217+
action="store_true",
218+
default=env_bool("DW_PERF_REQUIRE_TRUSTED_EVIDENCE"),
219+
help="Fail if the generated summary is not eligible for trusted_long_soak_v1 evidence.",
220+
)
207221
args = parser.parse_args()
208222
policy_ids = set(SERVER_CACHE_KEY_PATTERNS)
209223
args.max_server_cache_keys_by_policy = parse_policy_limit_map(
@@ -930,6 +944,7 @@ def main() -> int:
930944
"max_final_server_cache_keys_by_policy": args.max_final_server_cache_keys_by_policy,
931945
"max_server_memory_slope_mb_hour": args.max_server_memory_slope_mb_hour,
932946
"min_sample_coverage": args.min_sample_coverage,
947+
"require_trusted_evidence": args.require_trusted_evidence,
933948
},
934949
"evidence": {
935950
"started_at": started_at.isoformat().replace("+00:00", "Z"),
@@ -1015,6 +1030,14 @@ def main() -> int:
10151030
max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy,
10161031
failures=failures,
10171032
)
1033+
trust_reasons = summary["evidence"]["trust"].get("reasons") or []
1034+
if args.require_trusted_evidence and not summary["evidence"]["trust"].get("eligible"):
1035+
failures.append(
1036+
"trusted evidence profile is ineligible"
1037+
+ (f": {trust_reasons}" if trust_reasons else "")
1038+
)
1039+
metrics.mark_assertion_failed()
1040+
summary["failures"] = failures
10181041

10191042
metrics_path.write_text(metrics.prometheus(), encoding="utf-8")
10201043
summary_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")

tests/Unit/ServerPerfHarnessContractTest.php

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ public function test_soak_summary_records_trusted_evidence_fields(): void
7979
'per_policy_threshold_reasons',
8080
'max_server_cache_keys_by_policy=args.max_server_cache_keys_by_policy',
8181
'max_final_server_cache_keys_by_policy=args.max_final_server_cache_keys_by_policy',
82+
'DW_PERF_REQUIRE_TRUSTED_EVIDENCE',
83+
'--require-trusted-evidence',
84+
'require_trusted_evidence',
85+
'trusted evidence profile is ineligible',
8286
'duration below trusted long-soak minimum',
8387
'bounded-growth assertions failed',
8488
] as $needle) {
@@ -183,6 +187,30 @@ public function test_ci_perf_jobs_set_runner_environment_provenance(): void
183187
);
184188
}
185189

190+
public function test_self_hosted_perf_soak_requires_trusted_evidence_eligibility(): void
191+
{
192+
$workflow = file_get_contents(dirname(__DIR__, 2).'/.github/workflows/server-perf.yml');
193+
$this->assertNotFalse($workflow, '.github/workflows/server-perf.yml must be readable');
194+
195+
$this->assertMatchesRegularExpression(
196+
'/name:\s+Self-hosted polling cache soak.*?DW_PERF_REQUIRE_TRUSTED_EVIDENCE:\s+"true"/s',
197+
$workflow,
198+
'Self-hosted long soaks must fail instead of producing green ineligible trusted evidence.',
199+
);
200+
201+
$this->assertMatchesRegularExpression(
202+
'/name:\s+Polling cache bounded-growth smoke(?P<block>.*?)\n\s+soak:/s',
203+
$workflow,
204+
'Server Perf workflow must keep a distinct short smoke job before the long soak job.',
205+
);
206+
preg_match('/name:\s+Polling cache bounded-growth smoke(?P<block>.*?)\n\s+soak:/s', $workflow, $smokeMatch);
207+
$this->assertStringNotContainsString(
208+
'DW_PERF_REQUIRE_TRUSTED_EVIDENCE: "true"',
209+
(string) ($smokeMatch['block'] ?? ''),
210+
'Short perf smokes should remain useful but ineligible artifacts.',
211+
);
212+
}
213+
186214
public function test_ci_perf_trigger_paths_cover_bounded_growth_runtime_surfaces(): void
187215
{
188216
$repoRoot = dirname(__DIR__, 2);

0 commit comments

Comments
 (0)