Skip to content

Commit babaf88

Browse files
authored
Merge pull request #4 from ffengc/dev
Final paper deliverable Restructure README, add multi-trial harness and final paper figures.
2 parents 0920c37 + e19864a commit babaf88

31 files changed

Lines changed: 2535 additions & 668 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ server/
33
*.bak
44
docs/
55
logs/
6-
__pycache__/
6+
__pycache__/
7+
paper/

Predictor.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include <iostream>
1515
#include <algorithm>
16+
#include <chrono>
1617
#include <cmath>
1718
#include "log.hpp"
1819

@@ -89,6 +90,8 @@ class Predictor {
8990
// Core function: called once per second with the observed RPS.
9091
// Returns the target total number of warm workers to maintain.
9192
int UpdateAndPredict(int current_rps) {
93+
auto t0 = std::chrono::high_resolution_clock::now();
94+
9295
// Step 1: EWMA — track periodic baseline
9396
if (_ewma == 0.0) {
9497
_ewma = current_rps;
@@ -112,6 +115,14 @@ class Predictor {
112115
// Step 3: Little's Law N = ceil(lambda * T) + M_safety
113116
int target = (int)std::ceil(predicted_lambda * _avg_service_time) + _safety_margin;
114117

118+
// Per-tick inference latency — emitted at NORMAL so it lands in
119+
// server.log for paper Section 5.4 (Pareto/overhead) extraction.
120+
// Grep handle: [PredLatency] mode=ewma us=...
121+
long long us = std::chrono::duration_cast<std::chrono::microseconds>(
122+
std::chrono::high_resolution_clock::now() - t0).count();
123+
logMessage(NORMAL, "[PredLatency] mode=ewma us=%lld rps=%d target=%d",
124+
us, current_rps, target);
125+
115126
logMessage(DEBUG, "[Predictor] RPS=%d EWMA=%.2f T=%.3fs Target=%d",
116127
current_rps, _ewma, _avg_service_time, target);
117128

@@ -129,6 +140,8 @@ class Predictor {
129140
// low. Normalising by a running sigma removes the RPS-magnitude
130141
// dependence, so C2/C4 fire at the same Ramp window as C1/C3.
131142
int AdaptivePredict(int current_rps) {
143+
auto t0 = std::chrono::high_resolution_clock::now();
144+
132145
// Step 1: EWMA baseline (same as UpdateAndPredict).
133146
if (_ewma == 0.0) {
134147
_ewma = current_rps;
@@ -168,6 +181,11 @@ class Predictor {
168181

169182
int target = (int)std::ceil(predicted_lambda * _avg_service_time) + _safety_margin;
170183

184+
long long us = std::chrono::duration_cast<std::chrono::microseconds>(
185+
std::chrono::high_resolution_clock::now() - t0).count();
186+
logMessage(NORMAL, "[PredLatency] mode=ewma_adaptive us=%lld rps=%d target=%d",
187+
us, current_rps, target);
188+
171189
logMessage(DEBUG, "[Predictor/adaptive] RPS=%d EWMA=%.2f sigma=%.2f z=%.2f CUSUM_std=%.2f T=%.3fs Target=%d",
172190
current_rps, _ewma, sigma_safe, z, _cusum_std, _avg_service_time, target);
173191

@@ -179,7 +197,14 @@ class Predictor {
179197
// Target is computed directly from the current observed RPS via Little's Law.
180198
// Responds only to what just happened — never pre-warms ahead of demand.
181199
int ReactivePredict(int current_rps) {
200+
auto t0 = std::chrono::high_resolution_clock::now();
182201
int target = (int)std::ceil(current_rps * _avg_service_time) + _safety_margin;
202+
203+
long long us = std::chrono::duration_cast<std::chrono::microseconds>(
204+
std::chrono::high_resolution_clock::now() - t0).count();
205+
logMessage(NORMAL, "[PredLatency] mode=reactive us=%lld rps=%d target=%d",
206+
us, current_rps, target);
207+
183208
logMessage(DEBUG, "[Predictor/Reactive] RPS=%d T=%.3fs Target=%d",
184209
current_rps, _avg_service_time, target);
185210
return target;

0 commit comments

Comments
 (0)