|
15 | 15 | load_dotenv(_load_env) |
16 | 16 |
|
17 | 17 | import random |
| 18 | +from collections import deque |
18 | 19 | from datetime import datetime, timezone, timedelta |
19 | 20 |
|
20 | 21 | from fastapi import FastAPI, HTTPException |
|
66 | 67 |
|
67 | 68 | logger = structlog.get_logger(__name__) |
68 | 69 |
|
| 70 | + |
| 71 | +# Real-time API metrics tracker |
| 72 | +# Measures actual request counts, response times, and uptime |
| 73 | +# These replace the simulated demo metrics on the Overview dashboard |
| 74 | +class APIMetrics: |
| 75 | + """ |
| 76 | + Tracks real API performance metrics for the TradePulse dashboard. |
| 77 | +
|
| 78 | + All metrics are measured from actual requests hitting this FastAPI |
| 79 | + instance on Railway — not simulated values. |
| 80 | +
|
| 81 | + Design decisions: |
| 82 | + - deque with maxlen for rolling windows — O(1) append/popleft |
| 83 | + - Module-level singleton — shared across all requests |
| 84 | + - No external dependencies — pure Python stdlib |
| 85 | + """ |
| 86 | + |
| 87 | + def __init__(self) -> None: |
| 88 | + self.start_time = time.time() |
| 89 | + # Rolling window of last 100 response times in milliseconds |
| 90 | + self.response_times: deque = deque(maxlen=100) |
| 91 | + # Total request count since startup |
| 92 | + self.total_requests = 0 |
| 93 | + # Requests in the last 60 seconds for req/min calculation |
| 94 | + self.recent_request_times: deque = deque(maxlen=1000) |
| 95 | + # Count of non-200 responses |
| 96 | + self.error_count = 0 |
| 97 | + |
| 98 | + def record_request(self, response_time_ms: float, status_code: int) -> None: |
| 99 | + """Records a completed request.""" |
| 100 | + self.response_times.append(response_time_ms) |
| 101 | + self.total_requests += 1 |
| 102 | + self.recent_request_times.append(time.time()) |
| 103 | + if status_code >= 400: |
| 104 | + self.error_count += 1 |
| 105 | + |
| 106 | + def get_p99_latency(self) -> float: |
| 107 | + """Returns p99 response time in milliseconds.""" |
| 108 | + if not self.response_times: |
| 109 | + return 0.0 |
| 110 | + sorted_times = sorted(self.response_times) |
| 111 | + idx = int(len(sorted_times) * 0.99) |
| 112 | + return float(round(sorted_times[min(idx, len(sorted_times) - 1)], 1)) |
| 113 | + |
| 114 | + def get_p50_latency(self) -> float: |
| 115 | + """Returns p50 (median) response time in milliseconds.""" |
| 116 | + if not self.response_times: |
| 117 | + return 0.0 |
| 118 | + sorted_times = sorted(self.response_times) |
| 119 | + idx = len(sorted_times) // 2 |
| 120 | + return float(round(sorted_times[idx], 1)) |
| 121 | + |
| 122 | + def get_requests_per_minute(self) -> int: |
| 123 | + """Returns requests in the last 60 seconds.""" |
| 124 | + now = time.time() |
| 125 | + cutoff = now - 60 |
| 126 | + return len([t for t in self.recent_request_times if t > cutoff]) |
| 127 | + |
| 128 | + def get_uptime_seconds(self) -> int: |
| 129 | + """Returns seconds since app startup.""" |
| 130 | + return int(time.time() - self.start_time) |
| 131 | + |
| 132 | + def get_uptime_formatted(self) -> str: |
| 133 | + """Returns human-readable uptime string.""" |
| 134 | + seconds = self.get_uptime_seconds() |
| 135 | + hours = seconds // 3600 |
| 136 | + minutes = (seconds % 3600) // 60 |
| 137 | + secs = seconds % 60 |
| 138 | + if hours > 0: |
| 139 | + return f"{hours}h {minutes}m {secs}s" |
| 140 | + if minutes > 0: |
| 141 | + return f"{minutes}m {secs}s" |
| 142 | + return f"{secs}s" |
| 143 | + |
| 144 | + def get_avg_latency(self) -> float: |
| 145 | + """Returns average response time in milliseconds.""" |
| 146 | + if not self.response_times: |
| 147 | + return 0.0 |
| 148 | + return float(round(sum(self.response_times) / len(self.response_times), 1)) |
| 149 | + |
| 150 | + |
| 151 | +# Module-level singleton — shared across all requests |
| 152 | +metrics = APIMetrics() |
| 153 | + |
69 | 154 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
70 | 155 | STATIC_DIR = os.path.join(BASE_DIR, "static") |
71 | 156 |
|
@@ -138,6 +223,25 @@ async def log_requests(request, call_next): |
138 | 223 | get_metrics().emit_metric("APIErrors", 1.0, "Count", {"path": request.url.path, "status": str(response.status_code)}) |
139 | 224 | return response |
140 | 225 |
|
| 226 | + |
| 227 | +@app.middleware("http") |
| 228 | +async def track_metrics(request: Request, call_next): |
| 229 | + """ |
| 230 | + Middleware that measures response time for every request. |
| 231 | + Records to the module-level APIMetrics singleton. |
| 232 | + Adds X-Response-Time header to every response. |
| 233 | + """ |
| 234 | + start = time.time() |
| 235 | + response = await call_next(request) |
| 236 | + duration_ms = (time.time() - start) * 1000.0 |
| 237 | + |
| 238 | + if not request.url.path.startswith("/static"): |
| 239 | + metrics.record_request(duration_ms, response.status_code) |
| 240 | + |
| 241 | + response.headers["X-Response-Time"] = f"{duration_ms:.1f}ms" |
| 242 | + return response |
| 243 | + |
| 244 | + |
141 | 245 | _start_time = time.time() |
142 | 246 |
|
143 | 247 |
|
@@ -308,6 +412,51 @@ async def health(): |
308 | 412 | return {"status": "healthy", "mode": "demo"} |
309 | 413 |
|
310 | 414 |
|
| 415 | +@app.get("/metrics") |
| 416 | +async def read_api_metrics(): |
| 417 | + """ |
| 418 | + Returns real performance metrics for the TradePulse dashboard. |
| 419 | +
|
| 420 | + All values are measured from actual API requests on this Railway |
| 421 | + instance — not simulated or hardcoded. |
| 422 | +
|
| 423 | + Used by the Overview dashboard to replace demo mode fake numbers |
| 424 | + with genuinely accurate operational data. |
| 425 | +
|
| 426 | + Returns: |
| 427 | + uptime_seconds: Seconds since app startup on Railway |
| 428 | + uptime_formatted: Human readable uptime e.g. "2h 34m 12s" |
| 429 | + total_requests: Total API requests since startup |
| 430 | + requests_per_minute: Requests in the last 60 seconds |
| 431 | + p50_latency_ms: Median API response time in milliseconds |
| 432 | + p99_latency_ms: 99th percentile response time in ms |
| 433 | + avg_latency_ms: Average response time in milliseconds |
| 434 | + error_count: Total non-200 responses since startup |
| 435 | + error_rate_pct: Percentage of requests that errored |
| 436 | + status: healthy / degraded based on p99 latency |
| 437 | + """ |
| 438 | + total = metrics.total_requests |
| 439 | + error_rate = round( |
| 440 | + (metrics.error_count / total * 100) if total > 0 else 0, 2 |
| 441 | + ) |
| 442 | + |
| 443 | + p99 = metrics.get_p99_latency() |
| 444 | + status = "healthy" if p99 < 200 else "degraded" |
| 445 | + |
| 446 | + return { |
| 447 | + "uptime_seconds": metrics.get_uptime_seconds(), |
| 448 | + "uptime_formatted": metrics.get_uptime_formatted(), |
| 449 | + "total_requests": total, |
| 450 | + "requests_per_minute": metrics.get_requests_per_minute(), |
| 451 | + "p50_latency_ms": metrics.get_p50_latency(), |
| 452 | + "p99_latency_ms": p99, |
| 453 | + "avg_latency_ms": metrics.get_avg_latency(), |
| 454 | + "error_count": metrics.error_count, |
| 455 | + "error_rate_pct": error_rate, |
| 456 | + "status": status, |
| 457 | + } |
| 458 | + |
| 459 | + |
311 | 460 | @app.get("/market-prices") |
312 | 461 | async def get_market_prices(): |
313 | 462 | """ |
|
0 commit comments