@@ -34,6 +34,19 @@ class Predictor {
3434 double _drift; // Steady-state tolerance band (allowed deviation)
3535 double _threshold; // CUSUM alarm threshold
3636
37+ // --- Standardized CUSUM state (used only by AdaptivePredict) ---
38+ // Tracks EWMSD: an EWMA of |rps - baseline|, used as a running-scale proxy
39+ // for the typical deviation magnitude. Each observation is divided by
40+ // max(sigma, min_sigma) to produce a dimensionless z-score, so the alarm
41+ // threshold is invariant to the absolute RPS level and to clock-aliasing
42+ // effects that make a single window measure lower than its phase average.
43+ double _sigma_ewma; // EWMSD estimate of typical |deviation|
44+ double _beta; // EWMSD smoothing factor
45+ double _k_std; // Page-Hinkley allowance in z-score units
46+ double _h_std; // standardized alarm threshold
47+ double _min_sigma; // lower clamp on sigma (avoid z explosion at startup)
48+ double _cusum_std; // standardized CUSUM accumulator
49+
3750 double _avg_service_time; // T: average service time per request (seconds)
3851 int _safety_margin; // M_safety: extra workers for prediction error buffer
3952
@@ -49,14 +62,27 @@ class Predictor {
4962 int safety_margin = 1 )
5063 : _alpha(alpha), _ewma(0 ),
5164 _cusum (0 ), _drift(drift), _threshold(threshold),
65+ _sigma_ewma(0 ), _beta(0.3 ), _k_std(0.5 ), _h_std(3.0 ), _min_sigma(1.0 ),
66+ _cusum_std(0 ),
5267 _avg_service_time(avg_service_time),
5368 _safety_margin(safety_margin) {}
5469
5570 double GetAvgServiceTime () const { return _avg_service_time; }
5671
5772 // Feedback from DispatchPool threads: feed observed end-to-end latency (seconds)
5873 // so T tracks real workload characteristics rather than a static guess.
74+ //
75+ // Sanity clamp: worker.py sleeps 0.5s per request; cold-start fallback is 0.8s;
76+ // legitimate end-to-end never exceeds ~2s even under queuing. Observations > 5s
77+ // indicate pathological state (OS backpressure, client suspend, late-arriving
78+ // socket completion) and must NOT be smoothed into T — once observed before:
79+ // T drifted 0.5s -> 91s -> 322s -> Little's Law produced Target=443 -> OOM.
5980 void UpdateServiceTime (double observed_seconds) {
81+ if (observed_seconds > 5.0 || observed_seconds < 0.0 ) {
82+ logMessage (WARNING, " [Predictor] Discarded anomalous service time %.3fs (T kept at %.3fs)" ,
83+ observed_seconds, _avg_service_time);
84+ return ;
85+ }
6086 _avg_service_time = 0.1 * observed_seconds + 0.9 * _avg_service_time;
6187 }
6288
@@ -92,6 +118,62 @@ class Predictor {
92118 return target;
93119 }
94120
121+ // E7 — Standardized (adaptive) CUSUM:
122+ // sigma_t = beta * |rps - ewma| + (1 - beta) * sigma_{t-1} (EWMSD)
123+ // z_t = (rps - ewma) / max(sigma_t, min_sigma)
124+ // cusum = max(0, cusum + z_t - k_std)
125+ // alarm if cusum > h_std
126+ //
127+ // Motivation: the fixed drift/threshold variant is brittle when the true
128+ // Ramp boundary lands mid-window and the observed RPS is artificially
129+ // low. Normalising by a running sigma removes the RPS-magnitude
130+ // dependence, so C2/C4 fire at the same Ramp window as C1/C3.
131+ int AdaptivePredict (int current_rps) {
132+ // Step 1: EWMA baseline (same as UpdateAndPredict).
133+ if (_ewma == 0.0 ) {
134+ _ewma = current_rps;
135+ } else {
136+ _ewma = _alpha * current_rps + (1.0 - _alpha) * _ewma;
137+ }
138+
139+ double deviation = current_rps - _ewma;
140+
141+ // Step 2: EWMSD — track typical |deviation| as a running scale estimate.
142+ // We deliberately do NOT lazy-init _sigma_ewma to the first abs_dev:
143+ // that makes the first large deviation self-normalise to z=1, which
144+ // is insufficient to fire within the Ramp window (observed in the
145+ // warmup-sweep: adaptive C1 missed SPIKE at W=35s,60s because sigma
146+ // bootstrapped from the ramp itself). Always applying the EWMA
147+ // update means after a flat warmup (sigma≈0), the first ramp
148+ // window's abs_dev≈10 gives sigma = beta·abs_dev ≈ 3, so
149+ // z = abs_dev/sigma = 1/beta ≈ 3.3 — strong enough to cross
150+ // cusum_std = h=3.0 within two windows.
151+ double abs_dev = std::fabs (deviation);
152+ _sigma_ewma = _beta * abs_dev + (1.0 - _beta) * _sigma_ewma;
153+
154+ double sigma_safe = std::max (_sigma_ewma, _min_sigma);
155+ double z = deviation / sigma_safe;
156+
157+ // Step 3: Standardized CUSUM — accumulate z-scores above k_std only.
158+ _cusum_std = std::max (0.0 , _cusum_std + z - _k_std);
159+
160+ double predicted_lambda = _ewma;
161+
162+ if (_cusum_std > _h_std) {
163+ logMessage (WARNING, " [Predictor/adaptive] SPIKE DETECTED CUSUM_std=%.2f z=%.2f sigma=%.2f RPS=%d -> predicted_lambda=%.1f" ,
164+ _cusum_std, z, sigma_safe, current_rps, current_rps * 1.5 );
165+ predicted_lambda = current_rps * 1.5 ;
166+ _cusum_std = 0 ; // reset after acting on the alarm
167+ }
168+
169+ int target = (int )std::ceil (predicted_lambda * _avg_service_time) + _safety_margin;
170+
171+ logMessage (DEBUG, " [Predictor/adaptive] RPS=%d EWMA=%.2f sigma=%.2f z=%.2f CUSUM_std=%.2f T=%.3fs Target=%d" ,
172+ current_rps, _ewma, sigma_safe, z, _cusum_std, _avg_service_time, target);
173+
174+ return target;
175+ }
176+
95177 // E2 — Reactive baseline:
96178 // No EWMA smoothing, no CUSUM spike detection.
97179 // Target is computed directly from the current observed RPS via Little's Law.
0 commit comments