-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcalibration.rs
More file actions
335 lines (308 loc) · 11.8 KB
/
Copy pathcalibration.rs
File metadata and controls
335 lines (308 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
//! FALSIFY-CCPA-019: calibration-required-before-verdict gate.
//!
//! Codifies the lesson from M196-M224: Phase 5 machinery shipped without
//! end-to-end calibration → 4-bug stack (apr serve leak, claude permission
//! denial, missing cwd, prose-vs-JSON parse mismatch) survived to M224 and
//! produced an invalid "0/5 for both systems" verdict that triggered a
//! spec-rewrite cascade (M226-M232).
//!
//! Root cause: gate machinery was unit-tested with `MockDriver`; live-evidence
//! tests were `#[ignore]`'d until M224 actually pointed the harness at real
//! binaries — which was the FIRST end-to-end real-binary dispatch and
//! where the bug-stack collided.
//!
//! Fix: any final outcome-parity verdict (CCPA-016/017/018) — when promoted
//! `PROPOSED` → `ACTIVE_RUNTIME`, OR when an evidence file is treated as
//! discharging the gate — MUST be preceded by a successful calibration
//! run. A calibration run = the meter producing the expected output on
//! TWO known-good fixtures: a synthetic identity (both systems should
//! pass, baseline ≈ 1.0) and a synthetic regression (both systems should
//! fail, baseline ≈ 0.0). If the meter can't produce the expected output
//! on these fixtures, it can't be trusted to produce VALID output on
//! real fixtures regardless of the per-fixture numbers.
//!
//! This module provides:
//! - [`CalibrationRecord`] — a single calibration-run entry
//! - [`CalibrationLog`] — the operator-facing artifact at
//! `evidence/calibration/calibration-runs.json`
//! - [`passes_freshness_window`] — predicate enforced by the
//! FALSIFY-CCPA-019 gate test
//!
//! The artifact is operator-curated (the M234 retraction process is
//! manual; future calibration runs land here at operator dispatch time).
use serde::{Deserialize, Serialize};
/// Maximum age (days) of the most-recent calibration record before
/// the FALSIFY-CCPA-019 gate fires. 30 days is the operator-tier
/// freshness window — short enough to catch infrastructure drift
/// (rustc upgrades, apr CLI version bumps, claude CLI version bumps)
/// but long enough to not require weekly operator dispatch.
pub const FRESHNESS_WINDOW_DAYS: i64 = 30;
/// A single calibration-run entry. Records that the meter passed an
/// identity fixture (sanity: known-good produces pass) AND failed a
/// regression fixture (sanity: known-broken produces fail) on the
/// stated harness version, at the stated timestamp.
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct CalibrationRecord {
/// ISO 8601 UTC timestamp of the run, e.g. `"2026-05-17T10:30:00Z"`.
pub passed_at: String,
/// Git commit SHA of the bench harness at run time. Used to scope
/// freshness to a specific harness version — a calibration record
/// from a previous harness version cannot discharge the gate after
/// a non-trivial harness change.
pub harness_version: String,
/// Identity-fixture results: meter MUST report ≥ this threshold on
/// a synthetic-equivalent-trace fixture.
pub identity_pass: bool,
/// Regression-fixture results: meter MUST report < this threshold
/// on a synthetic-divergent-trace fixture (bidirectional sensitivity).
pub regression_fail: bool,
/// Free-text label describing the calibration scope, e.g.
/// `"M234 trivial-fixture + decy#39 dispatch"`. Used for audit
/// trail; not validated.
pub label: String,
}
/// The operator-curated calibration log. Append-only by convention;
/// new records prepend at the top of `records`.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CalibrationLog {
/// Calibration records, most-recent-first.
pub records: Vec<CalibrationRecord>,
}
impl CalibrationLog {
/// Parse a `CalibrationLog` from a JSON byte slice.
///
/// # Errors
///
/// Returns the underlying [`serde_json::Error`] on invalid JSON or
/// schema mismatch.
pub fn from_json_str(s: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(s)
}
/// Return the most-recent calibration record, or `None` if the log
/// is empty.
#[must_use]
pub fn most_recent(&self) -> Option<&CalibrationRecord> {
self.records.first()
}
/// Predicate enforced by the FALSIFY-CCPA-019 gate.
///
/// Returns `true` iff the most-recent record:
/// 1. exists,
/// 2. has `identity_pass == true`,
/// 3. has `regression_fail == true`,
/// 4. has a `passed_at` timestamp within `FRESHNESS_WINDOW_DAYS`
/// of `now_utc` (passed in to keep this function pure).
///
/// The `now_utc` parameter is the caller's responsibility — typically
/// `chrono::Utc::now().to_rfc3339()` or an equivalent ISO 8601 UTC
/// string. Passing the current time keeps the function pure (no IO,
/// no clock dependency for tests).
#[must_use]
pub fn passes_freshness_window(&self, now_utc: &str) -> bool {
let Some(record) = self.most_recent() else {
return false;
};
if !record.identity_pass || !record.regression_fail {
return false;
}
let Some(record_time) = parse_iso_date(&record.passed_at) else {
return false;
};
let Some(now_time) = parse_iso_date(now_utc) else {
return false;
};
let age_days = now_time - record_time;
(0..=FRESHNESS_WINDOW_DAYS).contains(&age_days)
}
}
/// Parse an ISO 8601 UTC timestamp into a day-count since 2000-01-01.
///
/// Returns `None` on malformed input. Intentionally minimal — accepts
/// `"YYYY-MM-DD"` and `"YYYY-MM-DDTHH:MM:SSZ"` shapes which is all the
/// calibration log uses. Pulling in `chrono` would be heavier than
/// needed for a one-format date-arithmetic primitive.
fn parse_iso_date(s: &str) -> Option<i64> {
// Accept "YYYY-MM-DD" prefix; ignore time component if present.
if s.len() < 10 {
return None;
}
let bytes = s.as_bytes();
if bytes[4] != b'-' || bytes[7] != b'-' {
return None;
}
let year: i64 = s[0..4].parse().ok()?;
let month: i64 = s[5..7].parse().ok()?;
let day: i64 = s[8..10].parse().ok()?;
if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
return None;
}
Some(days_since_epoch(year, month, day))
}
/// Convert (year, month, day) → days since 2000-01-01 (signed).
///
/// Handles leap years correctly. Year range 1900-2100 is sufficient
/// for this project's lifecycle. Reference: Gregorian calendar leap
/// rule (year % 4 == 0 AND (year % 100 != 0 OR year % 400 == 0)).
fn days_since_epoch(year: i64, month: i64, day: i64) -> i64 {
let mut total: i64 = 0;
if year >= 2000 {
for y in 2000..year {
total += if is_leap(y) { 366 } else { 365 };
}
} else {
for y in year..2000 {
total -= if is_leap(y) { 366 } else { 365 };
}
}
let days_in_month = month_days(year);
for m in 1..month {
// m is bounded [1, 12) by the loop range; cast is safe.
let idx = usize::try_from(m - 1).unwrap_or(0);
total += days_in_month[idx];
}
total += day - 1;
total
}
const fn is_leap(year: i64) -> bool {
(year % 4 == 0 && year % 100 != 0) || year % 400 == 0
}
fn month_days(year: i64) -> [i64; 12] {
[
31,
if is_leap(year) { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
]
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::disallowed_methods)]
mod tests {
use super::*;
fn make_record(passed_at: &str, identity: bool, regression: bool) -> CalibrationRecord {
CalibrationRecord {
passed_at: passed_at.to_owned(),
harness_version: "abc123".to_owned(),
identity_pass: identity,
regression_fail: regression,
label: "test".to_owned(),
}
}
#[test]
fn empty_log_fails_gate() {
let log = CalibrationLog { records: vec![] };
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn fresh_passing_record_passes_gate() {
let log = CalibrationLog {
records: vec![make_record("2026-05-17T10:00:00Z", true, true)],
};
assert!(log.passes_freshness_window("2026-05-17"));
}
#[test]
fn fresh_record_with_identity_fail_fails_gate() {
let log = CalibrationLog {
records: vec![make_record("2026-05-17", false, true)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn fresh_record_with_regression_pass_fails_gate() {
// Regression fixture that PASSED means the meter is broken
// (the regression should FAIL to demonstrate bidirectional
// sensitivity). regression_fail = false → gate fires.
let log = CalibrationLog {
records: vec![make_record("2026-05-17", true, false)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn stale_passing_record_fails_gate() {
// Record from 60 days ago — past the 30-day freshness window.
let log = CalibrationLog {
records: vec![make_record("2026-03-17", true, true)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn exactly_at_freshness_window_passes() {
// 30 days exactly = pass (≤ semantic).
let log = CalibrationLog {
records: vec![make_record("2026-04-17", true, true)],
};
assert!(log.passes_freshness_window("2026-05-17"));
}
#[test]
fn one_day_past_freshness_window_fails() {
let log = CalibrationLog {
records: vec![make_record("2026-04-16", true, true)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn future_record_fails_gate() {
// Defensive: a record dated AFTER now (clock skew or operator
// error) should not discharge the gate. age_days < 0 → fail.
let log = CalibrationLog {
records: vec![make_record("2026-06-17", true, true)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn most_recent_returns_first_record() {
let log = CalibrationLog {
records: vec![
make_record("2026-05-17", true, true),
make_record("2026-04-01", true, true),
],
};
let r = log.most_recent().expect("non-empty");
assert_eq!(r.passed_at, "2026-05-17");
}
#[test]
fn json_roundtrip() {
let log = CalibrationLog {
records: vec![make_record("2026-05-17T10:00:00Z", true, true)],
};
let json = serde_json::to_string(&log).expect("serialize");
let parsed = CalibrationLog::from_json_str(&json).expect("parse");
assert_eq!(parsed.records.len(), 1);
assert!(parsed.most_recent().unwrap().identity_pass);
}
#[test]
fn malformed_date_in_record_fails_gate() {
let log = CalibrationLog {
records: vec![make_record("not-a-date", true, true)],
};
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn malformed_now_utc_fails_gate() {
let log = CalibrationLog {
records: vec![make_record("2026-05-17", true, true)],
};
assert!(!log.passes_freshness_window("garbage"));
}
#[test]
fn leap_year_arithmetic_correct() {
// 2024 is a leap year (divisible by 4, not by 100).
// 2024-02-29 → 2024-03-01 = 1 day, not 0.
let log = CalibrationLog {
records: vec![make_record("2024-02-29", true, true)],
};
// 2024-02-29 → 2026-05-17 is way more than 30 days → fail.
assert!(!log.passes_freshness_window("2026-05-17"));
}
#[test]
fn freshness_constant_is_30_days() {
assert_eq!(FRESHNESS_WINDOW_DAYS, 30);
}
}