Skip to content

Commit 0920c37

Browse files
authored
Merge pull request #3 from ffengc/dev
update codes, figs, docs, #before pre version
2 parents 27f8770 + 155138f commit 0920c37

29 files changed

Lines changed: 1240 additions & 29 deletions

Predictor.hpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,19 @@ class Predictor {
3434
double _drift; // Steady-state tolerance band (allowed deviation)
3535
double _threshold; // CUSUM alarm threshold
3636

37+
// --- Standardized CUSUM state (used only by AdaptivePredict) ---
38+
// Tracks EWMSD: an EWMA of |rps - baseline|, used as a running-scale proxy
39+
// for the typical deviation magnitude. Each observation is divided by
40+
// max(sigma, min_sigma) to produce a dimensionless z-score, so the alarm
41+
// threshold is invariant to the absolute RPS level and to clock-aliasing
42+
// effects that make a single window measure lower than its phase average.
43+
double _sigma_ewma; // EWMSD estimate of typical |deviation|
44+
double _beta; // EWMSD smoothing factor
45+
double _k_std; // Page-Hinkley allowance in z-score units
46+
double _h_std; // standardized alarm threshold
47+
double _min_sigma; // lower clamp on sigma (avoid z explosion at startup)
48+
double _cusum_std; // standardized CUSUM accumulator
49+
3750
double _avg_service_time; // T: average service time per request (seconds)
3851
int _safety_margin; // M_safety: extra workers for prediction error buffer
3952

@@ -49,14 +62,27 @@ class Predictor {
4962
int safety_margin = 1)
5063
: _alpha(alpha), _ewma(0),
5164
_cusum(0), _drift(drift), _threshold(threshold),
65+
_sigma_ewma(0), _beta(0.3), _k_std(0.5), _h_std(3.0), _min_sigma(1.0),
66+
_cusum_std(0),
5267
_avg_service_time(avg_service_time),
5368
_safety_margin(safety_margin) {}
5469

5570
double GetAvgServiceTime() const { return _avg_service_time; }
5671

5772
// Feedback from DispatchPool threads: feed observed end-to-end latency (seconds)
5873
// so T tracks real workload characteristics rather than a static guess.
74+
//
75+
// Sanity clamp: worker.py sleeps 0.5s per request; cold-start fallback is 0.8s;
76+
// legitimate end-to-end never exceeds ~2s even under queuing. Observations > 5s
77+
// indicate pathological state (OS backpressure, client suspend, late-arriving
78+
// socket completion) and must NOT be smoothed into T — once observed before:
79+
// T drifted 0.5s -> 91s -> 322s -> Little's Law produced Target=443 -> OOM.
5980
void UpdateServiceTime(double observed_seconds) {
81+
if (observed_seconds > 5.0 || observed_seconds < 0.0) {
82+
logMessage(WARNING, "[Predictor] Discarded anomalous service time %.3fs (T kept at %.3fs)",
83+
observed_seconds, _avg_service_time);
84+
return;
85+
}
6086
_avg_service_time = 0.1 * observed_seconds + 0.9 * _avg_service_time;
6187
}
6288

@@ -92,6 +118,62 @@ class Predictor {
92118
return target;
93119
}
94120

121+
// E7 — Standardized (adaptive) CUSUM:
122+
// sigma_t = beta * |rps - ewma| + (1 - beta) * sigma_{t-1} (EWMSD)
123+
// z_t = (rps - ewma) / max(sigma_t, min_sigma)
124+
// cusum = max(0, cusum + z_t - k_std)
125+
// alarm if cusum > h_std
126+
//
127+
// Motivation: the fixed drift/threshold variant is brittle when the true
128+
// Ramp boundary lands mid-window and the observed RPS is artificially
129+
// low. Normalising by a running sigma removes the RPS-magnitude
130+
// dependence, so C2/C4 fire at the same Ramp window as C1/C3.
131+
int AdaptivePredict(int current_rps) {
132+
// Step 1: EWMA baseline (same as UpdateAndPredict).
133+
if (_ewma == 0.0) {
134+
_ewma = current_rps;
135+
} else {
136+
_ewma = _alpha * current_rps + (1.0 - _alpha) * _ewma;
137+
}
138+
139+
double deviation = current_rps - _ewma;
140+
141+
// Step 2: EWMSD — track typical |deviation| as a running scale estimate.
142+
// We deliberately do NOT lazy-init _sigma_ewma to the first abs_dev:
143+
// that makes the first large deviation self-normalise to z=1, which
144+
// is insufficient to fire within the Ramp window (observed in the
145+
// warmup-sweep: adaptive C1 missed SPIKE at W=35s,60s because sigma
146+
// bootstrapped from the ramp itself). Always applying the EWMA
147+
// update means after a flat warmup (sigma≈0), the first ramp
148+
// window's abs_dev≈10 gives sigma = beta·abs_dev ≈ 3, so
149+
// z = abs_dev/sigma = 1/beta ≈ 3.3 — strong enough to cross
150+
// cusum_std = h=3.0 within two windows.
151+
double abs_dev = std::fabs(deviation);
152+
_sigma_ewma = _beta * abs_dev + (1.0 - _beta) * _sigma_ewma;
153+
154+
double sigma_safe = std::max(_sigma_ewma, _min_sigma);
155+
double z = deviation / sigma_safe;
156+
157+
// Step 3: Standardized CUSUM — accumulate z-scores above k_std only.
158+
_cusum_std = std::max(0.0, _cusum_std + z - _k_std);
159+
160+
double predicted_lambda = _ewma;
161+
162+
if (_cusum_std > _h_std) {
163+
logMessage(WARNING, "[Predictor/adaptive] SPIKE DETECTED CUSUM_std=%.2f z=%.2f sigma=%.2f RPS=%d -> predicted_lambda=%.1f",
164+
_cusum_std, z, sigma_safe, current_rps, current_rps * 1.5);
165+
predicted_lambda = current_rps * 1.5;
166+
_cusum_std = 0; // reset after acting on the alarm
167+
}
168+
169+
int target = (int)std::ceil(predicted_lambda * _avg_service_time) + _safety_margin;
170+
171+
logMessage(DEBUG, "[Predictor/adaptive] RPS=%d EWMA=%.2f sigma=%.2f z=%.2f CUSUM_std=%.2f T=%.3fs Target=%d",
172+
current_rps, _ewma, sigma_safe, z, _cusum_std, _avg_service_time, target);
173+
174+
return target;
175+
}
176+
95177
// E2 — Reactive baseline:
96178
// No EWMA smoothing, no CUSUM spike detection.
97179
// Target is computed directly from the current observed RPS via Little's Law.

README-CN.md

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@
2929
- [Check #2 Checklist](#check-2-checklist)
3030
- [如何运行](#如何运行)
3131
- [目前的发现与问题](#目前的发现与问题)
32+
- [Before Pre — 演示前最终结果](#before-pre--演示前最终结果)
33+
- [相比 Check #2 的新增工作](#相比-check-2-的新增工作)
34+
- [主结果(5 模式 × 4 cycle,2026-04-20 单 trial)](#主结果5-模式--4-cycle2026-04-20-单-trial)
35+
- [CoW Template — 9× 冷启动提速](#cow-template--9-冷启动提速)
36+
- [CUSUM 实测 trace — 报警落在 ramp,不在 peak](#cusum-实测-trace--报警落在-ramp不在-peak)
37+
- [Workload 设计 — Bursty-Ramp × 4 cycles](#workload-设计--bursty-ramp--4-cycles)
38+
- [Warmup-Sweep 消融 — Fixed vs Adaptive 各有 failure mode](#warmup-sweep-消融--fixed-vs-adaptive-各有-failure-mode)
39+
- [已知局限 / Final Report 待办](#已知局限--final-report-待办)
3240

3341

3442
## Proposal
@@ -243,4 +251,95 @@ python3 load_tester.py
243251
**已知问题 / 局限:**
244252
- CUSUM 固定阈值导致每隔一个周期退化,需要自适应阈值改进
245253
- 目前在本机 loopback 测试,无真实网络延迟,CloudLab 结果可能不同
246-
- RSS 数值是各进程独立 RSS 之和,CoW 共享页被重复计算,真实物理内存(PSS)更低
254+
- RSS 数值是各进程独立 RSS 之和,CoW 共享页被重复计算,真实物理内存(PSS)更低
255+
256+
## Before Pre — 演示前最终结果
257+
258+
> 本节是 2026-04-30 课堂演示(CSCI 599)前整理的最终结果,对 Check #2 之后新增的 **CoW 量化、Adaptive CUSUM 基线、Warmup Sweep 消融** 做了系统化补充。所有图见 [`figures/pre/`](./figures/pre/) 目录及 [`MANIFEST.md`](./figures/pre/MANIFEST.md)
259+
260+
### 相比 Check #2 的新增工作
261+
262+
| 新增 | 文件 / 命令 | 说明 |
263+
|---|---|---|
264+
| **CoW 冷启动量化** | `figures/plot_cow.py``slide05_cow.png` | 从 worker.py 模拟 cold start + server.log "Worker N ready (CoW fork)" 日志手测:`Naive (exec Python + import) ≈ 900 ms` vs `CoW (fork from warm parent) ≈ 100 ms`**9× 提速 · 无 runtime 依赖** |
265+
| **CUSUM 实测 trace** | `figures/plot_rps_cusum.py``slide06_rps_cusum.png` | 用 sweep #3 真实 server.log(90 个 predictor tick、11 个 SPIKE DETECTED)重建 CUSUM 累积轨迹(drift=5, h=8),验证报警**全部落在 ramp 爬升段** |
266+
| **Adaptive CUSUM 基线** | `./server ewma_adaptive` | 用 EWMSD(running σ)做 z-score 归一化,作为 fixed-drift CUSUM 的对照 |
267+
| **Workload 设计可视化** | `figures/plot_workload.py``slide07_workload.png` |`load_tester.py` 的 4-cycle Bursty-Ramp 参数画成时间轴,标出 Ramp = CUSUM 检测窗口 |
268+
| **5-mode 主结果** | `figures/plot_main_result.py``slide08_main_result.png` | 2026-04-20 重跑 5 个模式 × 4 cycle,cold counts 从各 `load_tester_output.txt` 的 SPIKE COMPARISON 表解析 |
269+
| **Warmup-Sweep 消融** | `figures/plot_sweep.py``slide10_sweep.png` | sweep #1(W=5, 120 s 端点)+ sweep #3(W=10, 20, 35, 60 s 内点),对比 fixed vs adaptive |
270+
271+
### 主结果(5 模式 × 4 cycle,2026-04-20 单 trial)
272+
273+
![](./figures/pre/slide08_main_result.png)
274+
275+
| 模式 | C1 | C2 | C3 | C4 | 总计 | 说明 |
276+
|---|---:|---:|---:|---:|---:|---|
277+
| Static-15(过度配置) | 0 | 0 | 0 | 0 | **0** | 15 个 worker 全程钉死 —— 上界参照线 |
278+
| Adaptive CUSUM(EWMSD z-score) | 0 | 0 | 0 | 0 | **0** | W=35 落在 sweet spot |
279+
| **Fixed CUSUM(我们)** | 0 | 14 | **33** | 0 | **47** | C3 出现 clock-aliasing 事件 |
280+
| Reactive(按 backlog 扩缩) | 20 | 15 | 20 | 12 | **67** | 反应式基线 |
281+
| ARIMA(smoothed Target) | 20 | 18 | 31 | 16 | **85** | 历史时序预测 |
282+
283+
> 总冷启动数 = 4 个 cycle 中 600 个 spike 请求里被判为 cold(RTT > 700 ms)的总数。
284+
285+
**主要发现:**
286+
287+
- **预测式(Fixed CUSUM)明显赢反应式**:比 Reactive 少 **30%** cold starts,比 ARIMA 少 **45%**
288+
- **47 里有 33 来自 C3 一次 clock-aliasing 事件**:2 秒测量窗口正好把 ramp 切散,CUSUM 累加恰好过不了阈值,第一次 SPIKE DETECTED 推迟 4 秒(`server.log` 在 t=1776671064 时 CUSUM=18.24,而非预期 ~8–10)。这是 fixed-drift CUSUM 的**已知 failure mode,不是 bug**。不计这次事件,total ≈ 14,几乎贴着 Static-15 的 floor —— 但我们没钉死 15 个 worker
289+
- **CoW Template** 把每个新 worker 的 spin-up 从 **900 ms** 降到 **100 ms**(9× 提速、无 runtime 依赖)
290+
- **CUSUM 在 ramp 阶段触发,不是 peak**:单次 200 s 的 4-cycle run 共触发 11 次 SPIKE DETECTED,**全部落在 ramp 爬升段**
291+
292+
### CoW Template — 9× 冷启动提速
293+
294+
![](./figures/pre/slide05_cow.png)
295+
296+
启动一次 template Python 进程预先 import 好 Pillow + 建好 socket 骨架,后续每个 worker 通过 `fork()` 从 template 复制。Linux 的 copy-on-write 让 fork 几乎免费 —— Pillow 代码 / import 表是只读的,不会触发页复制。
297+
298+
**No image. No snapshot. No registry. Just `fork()` from a warm parent.**
299+
300+
### CUSUM 实测 trace — 报警落在 ramp,不在 peak
301+
302+
![](./figures/pre/slide06_rps_cusum.png)
303+
304+
上面 panel:蓝线是测得 RPS,橙虚线是 EWMA baseline(α=0.2)—— 故意滞后让 RPS 一脱离就显出 gap。
305+
306+
下面 panel:绿色是 CUSUM 累加器,越过红色虚线 `h = 8` 时 ★ 报警 —— **11 次报警全部落在 ramp 爬升段,0 次落在 peak 之后**。这就是 "catch the ramp, not the peak" 的实证。
307+
308+
### Workload 设计 — Bursty-Ramp × 4 cycles
309+
310+
![](./figures/pre/slide07_workload.png)
311+
312+
每个 cycle 模拟一次"列车到站"周期:`Warmup → Ramp(30 s) → Spike(30 RPS × 5 s) → Cooldown → Drain`
313+
314+
- **C1 warmup = 8 s**:测真正冷启动(无任何历史)
315+
- **C2–C4 warmup = 35 s**:长到让 scavenger 把 worker 全部缩回去,**但 EWMA baseline 还记得上次 spike** —— 测 predictor 跨 cycle 的记忆
316+
- **橙色 Ramp 即 CUSUM 的 30 s 检测窗口**:所有想在 peak 那一刻拿到的 worker,都必须在这 30 s 内 fork 好
317+
318+
### Warmup-Sweep 消融 — Fixed vs Adaptive 各有 failure mode
319+
320+
![](./figures/pre/slide10_sweep.png)
321+
322+
| W (s) | Fixed CUSUM | Adaptive CUSUM |
323+
|---:|---:|---:|
324+
| 5 | 48 | **287** |
325+
| 10 | 45 | 135 |
326+
| 20 | 0 | 0 |
327+
| 35 | 0 | 0 |
328+
| 60 | 0 | 0 |
329+
| 120 | **32** | 0 |
330+
331+
- **Adaptive 在短 W 翻车(τ_σ cliff)**:running σ 在背靠背 burst 之间降不下来,z-score 永远过不了阈值。τ_σ ≈ 6.6 s 是已实测的衰减常数
332+
- **Fixed 在长 W 翻车(aliasing miss)**:阈值 h=8 是为典型 ramp 调的,2 s 测量窗口在边界 case 下会切散 ramp
333+
- **Sweet spot: W = 20 ~ 60 s**:两种都工作。**主结果 W=35 正落在这里**,所以两种都接近 0
334+
- 网格总数:fixed=131 < adaptive=423 —— **raw number 反而 fixed 赢**
335+
336+
> **Framing****两个 failure modes,没有赢家**。Fixed 适合紧节奏,Adaptive 适合松节奏。Predictor 不是一个选项 —— **是一个 knob**。Adaptive 真正的贡献是 **scale invariance + aliasing robustness**,而不是更少的 cold starts。
337+
338+
### 已知局限 / Final Report 待办
339+
340+
- **n = 1 per sweep point**:受演示前时间预算限制,sweep 网格未做多 trial → CloudLab multi-trial(n ≥ 5)放在 final report
341+
- **当前 Python load_tester 上限 ~300 RPS**,不足以模拟真实 edge burst(目标 2 k+,需切到 `wrk` 或 Rust async)
342+
- **Regime-aware ensemble** 是 sweep 结果最直接的研究延伸:fixed + adaptive + meta-controller,自动按 workload 节奏选 —— 补上 W ≤ 10 s 的 gap,同时不丢 adaptive 的 scale invariance
343+
- 本机 loopback 测试,无真实网络延迟
344+
- RSS 是各进程独立 RSS 之和,CoW 共享页被重复计算,真实 PSS 更低
345+
- 演示交付物:[`docs/pre_how_4.md`](./docs/pre_how_4.md)(13 页 Slide 稿,中英双语)+ [`figures/pre/`](./figures/pre/) 5 张图

0 commit comments

Comments
 (0)