1313
1414#include < iostream>
1515#include < algorithm>
16+ #include < chrono>
1617#include < cmath>
1718#include " log.hpp"
1819
@@ -89,6 +90,8 @@ class Predictor {
8990 // Core function: called once per second with the observed RPS.
9091 // Returns the target total number of warm workers to maintain.
9192 int UpdateAndPredict (int current_rps) {
93+ auto t0 = std::chrono::high_resolution_clock::now ();
94+
9295 // Step 1: EWMA — track periodic baseline
9396 if (_ewma == 0.0 ) {
9497 _ewma = current_rps;
@@ -112,6 +115,14 @@ class Predictor {
112115 // Step 3: Little's Law N = ceil(lambda * T) + M_safety
113116 int target = (int )std::ceil (predicted_lambda * _avg_service_time) + _safety_margin;
114117
118+ // Per-tick inference latency — emitted at NORMAL so it lands in
119+ // server.log for paper Section 5.4 (Pareto/overhead) extraction.
120+ // Grep handle: [PredLatency] mode=ewma us=...
121+ long long us = std::chrono::duration_cast<std::chrono::microseconds>(
122+ std::chrono::high_resolution_clock::now () - t0).count ();
123+ logMessage (NORMAL, " [PredLatency] mode=ewma us=%lld rps=%d target=%d" ,
124+ us, current_rps, target);
125+
115126 logMessage (DEBUG, " [Predictor] RPS=%d EWMA=%.2f T=%.3fs Target=%d" ,
116127 current_rps, _ewma, _avg_service_time, target);
117128
@@ -129,6 +140,8 @@ class Predictor {
129140 // low. Normalising by a running sigma removes the RPS-magnitude
130141 // dependence, so C2/C4 fire at the same Ramp window as C1/C3.
131142 int AdaptivePredict (int current_rps) {
143+ auto t0 = std::chrono::high_resolution_clock::now ();
144+
132145 // Step 1: EWMA baseline (same as UpdateAndPredict).
133146 if (_ewma == 0.0 ) {
134147 _ewma = current_rps;
@@ -168,6 +181,11 @@ class Predictor {
168181
169182 int target = (int )std::ceil (predicted_lambda * _avg_service_time) + _safety_margin;
170183
184+ long long us = std::chrono::duration_cast<std::chrono::microseconds>(
185+ std::chrono::high_resolution_clock::now () - t0).count ();
186+ logMessage (NORMAL, " [PredLatency] mode=ewma_adaptive us=%lld rps=%d target=%d" ,
187+ us, current_rps, target);
188+
171189 logMessage (DEBUG, " [Predictor/adaptive] RPS=%d EWMA=%.2f sigma=%.2f z=%.2f CUSUM_std=%.2f T=%.3fs Target=%d" ,
172190 current_rps, _ewma, sigma_safe, z, _cusum_std, _avg_service_time, target);
173191
@@ -179,7 +197,14 @@ class Predictor {
179197 // Target is computed directly from the current observed RPS via Little's Law.
180198 // Responds only to what just happened — never pre-warms ahead of demand.
181199 int ReactivePredict (int current_rps) {
200+ auto t0 = std::chrono::high_resolution_clock::now ();
182201 int target = (int )std::ceil (current_rps * _avg_service_time) + _safety_margin;
202+
203+ long long us = std::chrono::duration_cast<std::chrono::microseconds>(
204+ std::chrono::high_resolution_clock::now () - t0).count ();
205+ logMessage (NORMAL, " [PredLatency] mode=reactive us=%lld rps=%d target=%d" ,
206+ us, current_rps, target);
207+
183208 logMessage (DEBUG, " [Predictor/Reactive] RPS=%d T=%.3fs Target=%d" ,
184209 current_rps, _avg_service_time, target);
185210 return target;
0 commit comments