|
38 | 38 | from zimfarm_worker.task.zim import get_zim_info |
39 | 39 |
|
40 | 40 | SLEEP_INTERVAL = 60 # nb of seconds to sleep before watching |
| 41 | +CPU_EWMA_ALPHA = 0.25 # EWMA smoothing factor for CPU percentage samples (0..1) |
41 | 42 | PENDING = "pending" |
42 | 43 | UPLOADING = "uploading" |
43 | 44 | UPLOADED = "uploaded" |
@@ -118,6 +119,7 @@ def __init__( |
118 | 119 | self.scraper_succeeded: bool | None = None # whether scraper succeeded |
119 | 120 |
|
120 | 121 | self.max_memory_usage: int = 0 # maximum memory used by scraper |
| 122 | + self.cpu_ewma: float = 0.0 # cpu exponential moving weighted average |
121 | 123 |
|
122 | 124 | # register stop/^C |
123 | 125 | self.register_signals() |
@@ -197,10 +199,41 @@ def submit_scraper_progress(self): |
197 | 199 | self.max_memory_usage, |
198 | 200 | ] |
199 | 201 | ) |
| 202 | + # --- CPU percentage calculation with EWMA smoothing --- |
| 203 | + cpu_sample = 0.0 |
| 204 | + cpu_stats = scraper_stats.get("cpu_stats", {}) |
| 205 | + precpu_stats = scraper_stats.get("precpu_stats", {}) |
| 206 | + prev_total = precpu_stats.get("cpu_usage", {}).get("total_usage", 0) |
| 207 | + curr_total = cpu_stats.get("cpu_usage", {}).get("total_usage", 0) |
| 208 | + prev_system = precpu_stats.get("system_cpu_usage", 0) |
| 209 | + curr_system = cpu_stats.get("system_cpu_usage", 0) |
| 210 | + |
| 211 | + delta_cpu = curr_total - prev_total |
| 212 | + delta_system = curr_system - prev_system |
| 213 | + |
| 214 | + online_cpus = cpu_stats.get("online_cpus", 0) |
| 215 | + if delta_system > 0 and delta_cpu >= 0: |
| 216 | + cpu_sample = (delta_cpu / float(delta_system)) * float(online_cpus) * 100.0 |
| 217 | + else: |
| 218 | + cpu_sample = 0.0 |
| 219 | + |
| 220 | + # apply EWMA smoothing to reduce effect of short spikes |
| 221 | + if self.cpu_ewma == 0.0: |
| 222 | + self.cpu_ewma = cpu_sample |
| 223 | + else: |
| 224 | + self.cpu_ewma = ( |
| 225 | + CPU_EWMA_ALPHA * cpu_sample + (1.0 - CPU_EWMA_ALPHA) * self.cpu_ewma |
| 226 | + ) |
| 227 | + |
200 | 228 | stats: dict[str, Any] = { |
201 | 229 | "memory": { |
202 | 230 | "max_usage": self.max_memory_usage, |
203 | | - } |
| 231 | + }, |
| 232 | + "cpu": { |
| 233 | + "current_percent": round(cpu_sample, 2), |
| 234 | + "ewma_percent": round(self.cpu_ewma, 2), |
| 235 | + "online_cpus": online_cpus, |
| 236 | + }, |
204 | 237 | } |
205 | 238 |
|
206 | 239 | # fetch and compute progression from progress file |
|
0 commit comments