Skip to content

Commit 80a7c0c

Browse files
committed
feat: Add backend APIs for SLO defaults and expected RPS calculation
Add two new endpoints to replace hardcoded UI calculations: - /api/v1/slo-defaults/{use_case}: Returns SLO targets at 75th percentile - /api/v1/expected-rps/{use_case}: Calculates RPS using research-based workload patterns (active_fraction * requests_per_min / 60) Update UI to fetch defaults from backend APIs instead of using heuristics. Fix Streamlit widget state issue where RPS input wouldn't update when use case or user count changed. Fixes: #50 Assisted-by: Claude <noreply@anthropic.com> Signed-off-by: Andre Fredette <afredette@redhat.com>
1 parent c416b74 commit 80a7c0c

File tree

2 files changed

+217
-7
lines changed

2 files changed

+217
-7
lines changed

backend/src/api/routes.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,144 @@ async def list_use_cases():
247247
raise HTTPException(status_code=500, detail=str(e)) from e
248248

249249

250+
def _round_to_nearest(value: float, nearest: int = 5) -> int:
251+
"""Round a value to the nearest multiple of `nearest`."""
252+
return int(round(value / nearest) * nearest)
253+
254+
255+
def _calculate_percentile_value(min_val: int, max_val: int, percentile: float = 0.75) -> int:
256+
"""Calculate value at given percentile between min and max, rounded to nearest 5."""
257+
value = min_val + (max_val - min_val) * percentile
258+
return _round_to_nearest(value, 5)
259+
260+
261+
@app.get("/api/v1/slo-defaults/{use_case}")
262+
async def get_slo_defaults(use_case: str):
263+
"""Get default SLO values for a use case.
264+
265+
Returns SLO targets at the 75th percentile between min and max,
266+
rounded to the nearest 5.
267+
"""
268+
try:
269+
import json
270+
json_path = Path(__file__).parent.parent.parent.parent / "data" / "business_context" / "use_case" / "configs" / "usecase_slo_workload.json"
271+
272+
if not json_path.exists():
273+
logger.error(f"SLO workload config not found at: {json_path}")
274+
raise HTTPException(status_code=404, detail="SLO workload configuration not found")
275+
276+
with open(json_path, 'r') as f:
277+
data = json.load(f)
278+
279+
use_case_data = data.get("use_case_slo_workload", {}).get(use_case)
280+
if not use_case_data:
281+
raise HTTPException(status_code=404, detail=f"Use case '{use_case}' not found")
282+
283+
slo_targets = use_case_data.get("slo_targets", {})
284+
285+
# Get SLO ranges - fail if data is missing
286+
ttft = slo_targets["ttft_ms"]
287+
itl = slo_targets["itl_ms"]
288+
e2e = slo_targets["e2e_ms"]
289+
290+
defaults = {
291+
"use_case": use_case,
292+
"description": use_case_data.get("description", ""),
293+
"ttft_ms": {
294+
"min": ttft["min"],
295+
"max": ttft["max"],
296+
"default": _calculate_percentile_value(ttft["min"], ttft["max"], 0.75)
297+
},
298+
"itl_ms": {
299+
"min": itl["min"],
300+
"max": itl["max"],
301+
"default": _calculate_percentile_value(itl["min"], itl["max"], 0.75)
302+
},
303+
"e2e_ms": {
304+
"min": e2e["min"],
305+
"max": e2e["max"],
306+
"default": _calculate_percentile_value(e2e["min"], e2e["max"], 0.75)
307+
}
308+
}
309+
310+
return {"success": True, "slo_defaults": defaults}
311+
312+
except HTTPException:
313+
raise
314+
except KeyError as e:
315+
logger.error(f"Missing SLO data for {use_case}: {e}")
316+
raise HTTPException(status_code=500, detail=f"Missing SLO data: {e}") from e
317+
except Exception as e:
318+
logger.error(f"Failed to get SLO defaults for {use_case}: {e}")
319+
raise HTTPException(status_code=500, detail=str(e)) from e
320+
321+
322+
@app.get("/api/v1/expected-rps/{use_case}")
323+
async def get_expected_rps(use_case: str, user_count: int = 1000):
324+
"""Calculate expected RPS for a use case based on workload patterns.
325+
326+
Uses research-backed workload distribution parameters:
327+
- active_fraction: percentage of users active at any time
328+
- requests_per_active_user_per_min: request rate per active user
329+
330+
Formula: expected_rps = (user_count * active_fraction * requests_per_min) / 60
331+
"""
332+
try:
333+
import json
334+
json_path = Path(__file__).parent.parent.parent.parent / "data" / "business_context" / "use_case" / "configs" / "usecase_slo_workload.json"
335+
336+
if not json_path.exists():
337+
logger.error(f"SLO workload config not found at: {json_path}")
338+
raise HTTPException(status_code=404, detail="SLO workload configuration not found")
339+
340+
with open(json_path, 'r') as f:
341+
data = json.load(f)
342+
343+
use_case_data = data.get("use_case_slo_workload", {}).get(use_case)
344+
if not use_case_data:
345+
raise HTTPException(status_code=404, detail=f"Use case '{use_case}' not found")
346+
347+
workload = use_case_data.get("workload", {})
348+
349+
# Get workload parameters - fail if missing
350+
active_fraction = workload["active_fraction"]
351+
requests_per_min = workload["requests_per_active_user_per_min"]
352+
peak_multiplier = workload.get("peak_multiplier", 2.0)
353+
distribution = workload.get("distribution", "poisson")
354+
355+
# Calculate expected RPS using research-based formula
356+
expected_concurrent = int(user_count * active_fraction)
357+
expected_rps = (expected_concurrent * requests_per_min) / 60
358+
expected_rps = max(1, round(expected_rps, 2)) # Minimum 1 RPS, round to 2 decimals
359+
360+
# Calculate peak RPS for capacity planning
361+
peak_rps = expected_rps * peak_multiplier
362+
363+
return {
364+
"success": True,
365+
"use_case": use_case,
366+
"user_count": user_count,
367+
"workload_params": {
368+
"active_fraction": active_fraction,
369+
"requests_per_active_user_per_min": requests_per_min,
370+
"peak_multiplier": peak_multiplier,
371+
"distribution": distribution
372+
},
373+
"expected_rps": expected_rps,
374+
"expected_concurrent_users": expected_concurrent,
375+
"peak_rps": round(peak_rps, 2)
376+
}
377+
378+
except HTTPException:
379+
raise
380+
except KeyError as e:
381+
logger.error(f"Missing workload data for {use_case}: {e}")
382+
raise HTTPException(status_code=500, detail=f"Missing workload data: {e}") from e
383+
except Exception as e:
384+
logger.error(f"Failed to calculate expected RPS for {use_case}: {e}")
385+
raise HTTPException(status_code=500, detail=str(e)) from e
386+
387+
250388
# Get benchmark data
251389
@app.get("/api/v1/benchmarks")
252390
async def get_benchmarks():

ui/app.py

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,6 +2020,55 @@ def _get_hardware_selection_reason(priority: str, hw_option: dict, slo_targets:
20202020
# RANKED RECOMMENDATIONS (Backend API Integration)
20212021
# =============================================================================
20222022

2023+
@st.cache_data(ttl=300)
2024+
def fetch_slo_defaults(use_case: str) -> dict | None:
2025+
"""Fetch default SLO values for a use case from the backend API.
2026+
2027+
Returns dict with ttft_ms, itl_ms, e2e_ms each containing min, max, default.
2028+
Cached for 5 minutes.
2029+
"""
2030+
try:
2031+
response = requests.get(
2032+
f"{API_BASE_URL}/api/v1/slo-defaults/{use_case}",
2033+
timeout=10,
2034+
)
2035+
response.raise_for_status()
2036+
data = response.json()
2037+
if data.get("success"):
2038+
return data.get("slo_defaults")
2039+
return None
2040+
except Exception as e:
2041+
logger.warning(f"Failed to fetch SLO defaults for {use_case}: {e}")
2042+
return None
2043+
2044+
2045+
@st.cache_data(ttl=300)
2046+
def fetch_expected_rps(use_case: str, user_count: int) -> dict | None:
2047+
"""Fetch expected RPS for a use case from the backend API.
2048+
2049+
Uses research-backed workload patterns to calculate:
2050+
- expected_rps: average requests per second
2051+
- peak_rps: peak capacity needed
2052+
- workload_params: active_fraction, requests_per_min, etc.
2053+
2054+
Cached for 5 minutes.
2055+
"""
2056+
try:
2057+
response = requests.get(
2058+
f"{API_BASE_URL}/api/v1/expected-rps/{use_case}",
2059+
params={"user_count": user_count},
2060+
timeout=10,
2061+
)
2062+
response.raise_for_status()
2063+
data = response.json()
2064+
if data.get("success"):
2065+
return data
2066+
return None
2067+
except Exception as e:
2068+
logger.warning(f"Failed to fetch expected RPS for {use_case}: {e}")
2069+
return None
2070+
2071+
20232072
def fetch_ranked_recommendations(
20242073
use_case: str,
20252074
user_count: int,
@@ -4747,10 +4796,27 @@ def render_slo_cards(use_case: str, user_count: int, priority: str = "balanced",
47474796
ttft_default = int(ttft_max_raw * ttft_factor)
47484797
itl_default = int(itl_max_raw * itl_factor)
47494798
e2e_default = int(e2e_max_raw * e2e_factor)
4750-
4751-
# Calculate QPS based on user count
4752-
estimated_qps = max(1, user_count // 50)
4753-
4799+
4800+
# Fetch expected RPS from backend using research-based workload patterns
4801+
rps_data = fetch_expected_rps(use_case, user_count)
4802+
if rps_data:
4803+
estimated_qps = int(rps_data.get("expected_rps", 1))
4804+
else:
4805+
# Fallback to simple heuristic if API fails
4806+
estimated_qps = max(1, user_count // 50)
4807+
4808+
# Track if use_case or user_count changed - if so, reset custom_qps to use new default
4809+
last_use_case = st.session_state.get("_last_rps_use_case")
4810+
last_user_count = st.session_state.get("_last_rps_user_count")
4811+
if last_use_case != use_case or last_user_count != user_count:
4812+
# Use case or user count changed - reset to new calculated default
4813+
st.session_state.custom_qps = None
4814+
st.session_state._last_rps_use_case = use_case
4815+
st.session_state._last_rps_user_count = user_count
4816+
# Clear the widget key to force re-render with new value
4817+
if "edit_qps" in st.session_state:
4818+
del st.session_state["edit_qps"]
4819+
47544820
# Use custom values if set, otherwise use MAX as default (shows all configs)
47554821
# Ensure all values are integers for slider compatibility
47564822
ttft = int(st.session_state.custom_ttft) if st.session_state.custom_ttft else ttft_default
@@ -4873,6 +4939,12 @@ def render_slo_cards(use_case: str, user_count: int, priority: str = "balanced",
48734939
prev_itl = st.session_state.get("_last_itl")
48744940
prev_e2e = st.session_state.get("_last_e2e")
48754941

4942+
# Fetch use-case specific SLO defaults from backend API
4943+
slo_defaults = fetch_slo_defaults(use_case)
4944+
default_ttft = slo_defaults["ttft_ms"]["default"] if slo_defaults else 500
4945+
default_itl = slo_defaults["itl_ms"]["default"] if slo_defaults else 50
4946+
default_e2e = slo_defaults["e2e_ms"]["default"] if slo_defaults else 10000
4947+
48764948
# Helper function to get range for a metric and percentile
48774949
def get_metric_range(metric: str, percentile_key: str) -> tuple:
48784950
if metric == "ttft":
@@ -4894,7 +4966,7 @@ def get_metric_range(metric: str, percentile_key: str) -> tuple:
48944966
# === TTFT ===
48954967
ttft_min, ttft_max = get_metric_range("ttft", st.session_state.ttft_percentile)
48964968
if 'input_ttft' not in st.session_state:
4897-
st.session_state.input_ttft = ttft_max
4969+
st.session_state.input_ttft = default_ttft
48984970

48994971
st.markdown('<div style="margin-top: 0.5rem; margin-bottom: 0.25rem;"><span style="color: #EE0000; font-weight: 700; font-size: 0.95rem;">TTFT (Time to First Token)</span></div>', unsafe_allow_html=True)
49004972
ttft_val_col, ttft_pct_col = st.columns([2, 1])
@@ -4910,7 +4982,7 @@ def get_metric_range(metric: str, percentile_key: str) -> tuple:
49104982
# === ITL ===
49114983
itl_min, itl_max = get_metric_range("itl", st.session_state.itl_percentile)
49124984
if 'input_itl' not in st.session_state:
4913-
st.session_state.input_itl = itl_max
4985+
st.session_state.input_itl = default_itl
49144986

49154987
st.markdown('<div style="margin-top: 1rem; margin-bottom: 0.25rem;"><span style="color: #EE0000; font-weight: 700; font-size: 0.95rem;">ITL (Inter-Token Latency)</span></div>', unsafe_allow_html=True)
49164988
itl_val_col, itl_pct_col = st.columns([2, 1])
@@ -4926,7 +4998,7 @@ def get_metric_range(metric: str, percentile_key: str) -> tuple:
49264998
# === E2E ===
49274999
e2e_min, e2e_max = get_metric_range("e2e", st.session_state.e2e_percentile)
49285000
if 'input_e2e' not in st.session_state:
4929-
st.session_state.input_e2e = e2e_max
5001+
st.session_state.input_e2e = default_e2e
49305002

49315003
st.markdown('<div style="margin-top: 1rem; margin-bottom: 0.25rem;"><span style="color: #EE0000; font-weight: 700; font-size: 0.95rem;">E2E (End-to-End Latency)</span></div>', unsafe_allow_html=True)
49325004
e2e_val_col, e2e_pct_col = st.columns([2, 1])

0 commit comments

Comments
 (0)