Skip to content

Commit 3cee8e5

Browse files
committed
fix: Pass correct Tech Specs values to recommendation API
Store workload profile values (prompt_tokens, output_tokens, peak_multiplier, expected_qps) in session state when rendering Technical Specifications tab. Read these values in render_results_tab to ensure the API receives exactly what the user sees on screen. Changes: - Store spec_prompt_tokens, spec_output_tokens, spec_peak_multiplier, spec_expected_qps in session state from render_slo_cards - Read token config from session state instead of hardcoded dict - Fix expected_qps fallback to use 1 RPS instead of user_count - Pass percentile from session state to backend API - Add comprehensive request logging to ranked_recommend_from_spec endpoint Fixes: #51 Assisted-by: Claude <noreply@anthropic.com> Signed-off-by: Andre Fredette <afredette@redhat.com>
1 parent 80a7c0c commit 3cee8e5

File tree

2 files changed

+69
-45
lines changed

2 files changed

+69
-45
lines changed

backend/src/api/routes.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -727,23 +727,40 @@ async def ranked_recommend_from_spec(request: RankedRecommendationFromSpecReques
727727
itl_target = request.get_itl_target()
728728
e2e_target = request.get_e2e_target()
729729
percentile = request.percentile
730-
731-
logger.info(
732-
f"Received ranked recommendation from spec: use_case={request.use_case}, "
733-
f"user_count={request.user_count}, qps={request.expected_qps}"
734-
)
735-
logger.info(
736-
f" SLO targets ({percentile}): TTFT={ttft_target}ms, "
737-
f"ITL={itl_target}ms, E2E={e2e_target}ms"
738-
)
739-
logger.info(
740-
f" Token config: {request.prompt_tokens} -> {request.output_tokens}"
741-
)
730+
731+
# Log complete request for debugging
732+
logger.info("=" * 60)
733+
logger.info("RANKED-RECOMMEND-FROM-SPEC REQUEST")
734+
logger.info("=" * 60)
735+
logger.info(f" use_case: {request.use_case}")
736+
logger.info(f" user_count: {request.user_count}")
737+
logger.info(f" latency_requirement: {request.latency_requirement}")
738+
logger.info(f" budget_constraint: {request.budget_constraint}")
739+
logger.info(f" hardware_preference: {request.hardware_preference}")
740+
logger.info(f" prompt_tokens: {request.prompt_tokens}")
741+
logger.info(f" output_tokens: {request.output_tokens}")
742+
logger.info(f" expected_qps: {request.expected_qps}")
743+
logger.info(f" percentile: {percentile}")
744+
logger.info(f" ttft_target_ms (raw): {request.ttft_target_ms}")
745+
logger.info(f" itl_target_ms (raw): {request.itl_target_ms}")
746+
logger.info(f" e2e_target_ms (raw): {request.e2e_target_ms}")
747+
logger.info(f" ttft_p95_target_ms (legacy): {request.ttft_p95_target_ms}")
748+
logger.info(f" itl_p95_target_ms (legacy): {request.itl_p95_target_ms}")
749+
logger.info(f" e2e_p95_target_ms (legacy): {request.e2e_p95_target_ms}")
750+
logger.info(f" -> Resolved TTFT: {ttft_target}ms")
751+
logger.info(f" -> Resolved ITL: {itl_target}ms")
752+
logger.info(f" -> Resolved E2E: {e2e_target}ms")
753+
logger.info(f" min_accuracy: {request.min_accuracy}")
754+
logger.info(f" max_cost: {request.max_cost}")
755+
logger.info(f" include_near_miss: {request.include_near_miss}")
742756
if request.weights:
743757
logger.info(
744-
f" Weights: A={request.weights.accuracy}, P={request.weights.price}, "
745-
f"L={request.weights.latency}, C={request.weights.complexity}"
758+
f" weights: accuracy={request.weights.accuracy}, price={request.weights.price}, "
759+
f"latency={request.weights.latency}, complexity={request.weights.complexity}"
746760
)
761+
else:
762+
logger.info(" weights: None (using defaults)")
763+
logger.info("=" * 60)
747764

748765
# Build specifications dict for workflow
749766
specifications = {

ui/app.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3206,13 +3206,13 @@ def get_enhanced_recommendation(business_context: dict) -> Optional[dict]:
32063206
# Extract values from business_context for the API
32073207
use_case = business_context.get("use_case", "chatbot_conversational")
32083208
priority = business_context.get("priority", "balanced")
3209-
user_count = business_context.get("user_count", 30)
3209+
user_count = business_context.get("user_count", 1000)
32103210
prompt_tokens = business_context.get("prompt_tokens", 512)
32113211
output_tokens = business_context.get("output_tokens", 256)
3212-
expected_qps = business_context.get("expected_qps", user_count)
3213-
ttft_target = business_context.get("ttft_p95_target_ms", 5000)
3214-
itl_target = business_context.get("itl_p95_target_ms", 200)
3215-
e2e_target = business_context.get("e2e_p95_target_ms", 60000)
3212+
expected_qps = business_context.get("expected_qps", 1) # Default to 1 RPS, not user_count!
3213+
ttft_target = business_context.get("ttft_p95_target_ms", 500)
3214+
itl_target = business_context.get("itl_p95_target_ms", 50)
3215+
e2e_target = business_context.get("e2e_p95_target_ms", 10000)
32163216
percentile = business_context.get("percentile", "p95")
32173217

32183218
# Use the ranked-recommend-from-spec endpoint with proper fields
@@ -5044,13 +5044,21 @@ def get_metric_range(metric: str, percentile_key: str) -> tuple:
50445044
workload_data = load_research_workload_patterns()
50455045
pattern = workload_data.get('workload_distributions', {}).get(use_case, {}) if workload_data else {}
50465046
peak_mult = pattern.get('peak_multiplier', 2.0)
5047-
5047+
5048+
# Store workload profile values in session state for Recommendations tab
5049+
st.session_state.spec_prompt_tokens = prompt_tokens
5050+
st.session_state.spec_output_tokens = output_tokens
5051+
st.session_state.spec_peak_multiplier = peak_mult
5052+
50485053
# 1. Editable QPS - support up to 10M QPS for enterprise scale
50495054
# Get research-based default QPS for this use case
50505055
default_qps = estimated_qps # This is the research-based default
50515056
new_qps = st.number_input("Expected RPS", value=min(qps, 10000000), min_value=1, max_value=10000000, step=1, key="edit_qps", label_visibility="collapsed")
50525057
st.markdown(f'<div style="font-size: 0.9rem; color: rgba(255,255,255,0.7); margin-top: -0.75rem; margin-bottom: 0.5rem;">Expected RPS: <span style="color: white; font-weight: 700; font-size: 1rem;">{new_qps}</span> <span style="color: rgba(255,255,255,0.4); font-size: 0.75rem;">(default: {default_qps})</span></div>', unsafe_allow_html=True)
5053-
5058+
5059+
# Store the actual QPS value shown to user (not just custom override)
5060+
st.session_state.spec_expected_qps = new_qps
5061+
50545062
if new_qps != qps:
50555063
st.session_state.custom_qps = new_qps
50565064

@@ -5816,33 +5824,31 @@ def render_results_tab(priority: str, models_df: pd.DataFrame):
58165824
st.session_state.pop('ranked_response', None)
58175825

58185826
if True: # Always regenerate
5819-
# Get custom SLO values from session state (set in Tech Specs tab)
5827+
# Get all specification values from session state (set in Tech Specs tab)
5828+
# These are the EXACT values the user sees on the Technical Specifications tab
58205829
use_case = final_extraction.get("use_case", "chatbot_conversational")
5821-
5822-
# Get SLO targets - use custom values if set, otherwise use defaults
5823-
# Use explicit None check to handle 0 values correctly
5824-
ttft_target = st.session_state.get("custom_ttft") if st.session_state.get("custom_ttft") is not None else (st.session_state.get("input_ttft") if st.session_state.get("input_ttft") is not None else 15000)
5825-
itl_target = st.session_state.get("custom_itl") if st.session_state.get("custom_itl") is not None else (st.session_state.get("input_itl") if st.session_state.get("input_itl") is not None else 200)
5826-
e2e_target = st.session_state.get("custom_e2e") if st.session_state.get("custom_e2e") is not None else (st.session_state.get("input_e2e") if st.session_state.get("input_e2e") is not None else 60000)
5827-
qps_target = st.session_state.get("custom_qps") if st.session_state.get("custom_qps") is not None else (st.session_state.get("input_qps") if st.session_state.get("input_qps") is not None else final_extraction.get("user_count", 30))
5828-
5829-
# Get token config for use case
5830-
token_configs = {
5831-
"chatbot_conversational": (512, 256),
5832-
"code_completion": (512, 256),
5833-
"code_generation_detailed": (1024, 1024),
5834-
"translation": (512, 256),
5835-
"content_generation": (512, 256),
5836-
"summarization_short": (4096, 512),
5837-
"document_analysis_rag": (4096, 512),
5838-
"long_document_summarization": (10240, 1536),
5839-
"research_legal_analysis": (10240, 1536), # Fixed: was (4096, 1024)
5840-
}
5841-
prompt_tokens, output_tokens = token_configs.get(use_case, (512, 256))
5842-
5830+
user_count = final_extraction.get("user_count", 1000)
5831+
5832+
# Get SLO targets from session state (set by number_input widgets)
5833+
ttft_target = st.session_state.get("custom_ttft") or st.session_state.get("input_ttft") or 500
5834+
itl_target = st.session_state.get("custom_itl") or st.session_state.get("input_itl") or 50
5835+
e2e_target = st.session_state.get("custom_e2e") or st.session_state.get("input_e2e") or 10000
5836+
5837+
# Get QPS from session state - this is the value shown in the Expected RPS input
5838+
qps_target = st.session_state.get("spec_expected_qps") or st.session_state.get("custom_qps") or 1
5839+
5840+
# Get token config from session state (set by render_slo_cards)
5841+
prompt_tokens = st.session_state.get("spec_prompt_tokens", 512)
5842+
output_tokens = st.session_state.get("spec_output_tokens", 256)
5843+
5844+
# Get percentile from session state (default to p95)
5845+
# Note: Currently we use a single percentile for all metrics for the backend query
5846+
# The UI allows per-metric percentiles but the backend uses one
5847+
percentile = st.session_state.get("slo_percentile", "p95")
5848+
58435849
business_context = {
58445850
"use_case": use_case,
5845-
"user_count": final_extraction.get("user_count", 1000),
5851+
"user_count": user_count,
58465852
"priority": used_priority,
58475853
"hardware_preference": final_extraction.get("hardware"),
58485854
"prompt_tokens": prompt_tokens,
@@ -5851,6 +5857,7 @@ def render_results_tab(priority: str, models_df: pd.DataFrame):
58515857
"ttft_p95_target_ms": int(ttft_target),
58525858
"itl_p95_target_ms": int(itl_target),
58535859
"e2e_p95_target_ms": int(e2e_target),
5860+
"percentile": percentile,
58545861
}
58555862
with st.spinner(f"Scoring {len(models_df)} models with MCDM..."):
58565863
recommendation = get_enhanced_recommendation(business_context)

0 commit comments

Comments
 (0)