@@ -3206,13 +3206,13 @@ def get_enhanced_recommendation(business_context: dict) -> Optional[dict]:
32063206 # Extract values from business_context for the API
32073207 use_case = business_context .get ("use_case" , "chatbot_conversational" )
32083208 priority = business_context .get ("priority" , "balanced" )
3209- user_count = business_context .get ("user_count" , 30 )
3209+ user_count = business_context .get ("user_count" , 1000 )
32103210 prompt_tokens = business_context .get ("prompt_tokens" , 512 )
32113211 output_tokens = business_context .get ("output_tokens" , 256 )
3212- expected_qps = business_context .get ("expected_qps" , user_count )
3213- ttft_target = business_context .get ("ttft_p95_target_ms" , 5000 )
3214- itl_target = business_context .get ("itl_p95_target_ms" , 200 )
3215- e2e_target = business_context .get ("e2e_p95_target_ms" , 60000 )
3212+ expected_qps = business_context .get ("expected_qps" , 1 ) # Default to 1 RPS, not user_count!
3213+ ttft_target = business_context .get ("ttft_p95_target_ms" , 500 )
3214+ itl_target = business_context .get ("itl_p95_target_ms" , 50 )
3215+ e2e_target = business_context .get ("e2e_p95_target_ms" , 10000 )
32163216 percentile = business_context .get ("percentile" , "p95" )
32173217
32183218 # Use the ranked-recommend-from-spec endpoint with proper fields
@@ -5044,13 +5044,21 @@ def get_metric_range(metric: str, percentile_key: str) -> tuple:
50445044 workload_data = load_research_workload_patterns ()
50455045 pattern = workload_data .get ('workload_distributions' , {}).get (use_case , {}) if workload_data else {}
50465046 peak_mult = pattern .get ('peak_multiplier' , 2.0 )
5047-
5047+
5048+ # Store workload profile values in session state for Recommendations tab
5049+ st .session_state .spec_prompt_tokens = prompt_tokens
5050+ st .session_state .spec_output_tokens = output_tokens
5051+ st .session_state .spec_peak_multiplier = peak_mult
5052+
50485053 # 1. Editable QPS - support up to 10M QPS for enterprise scale
50495054 # Get research-based default QPS for this use case
50505055 default_qps = estimated_qps # This is the research-based default
50515056 new_qps = st .number_input ("Expected RPS" , value = min (qps , 10000000 ), min_value = 1 , max_value = 10000000 , step = 1 , key = "edit_qps" , label_visibility = "collapsed" )
50525057 st .markdown (f'<div style="font-size: 0.9rem; color: rgba(255,255,255,0.7); margin-top: -0.75rem; margin-bottom: 0.5rem;">Expected RPS: <span style="color: white; font-weight: 700; font-size: 1rem;">{ new_qps } </span> <span style="color: rgba(255,255,255,0.4); font-size: 0.75rem;">(default: { default_qps } )</span></div>' , unsafe_allow_html = True )
5053-
5058+
5059+ # Store the actual QPS value shown to user (not just custom override)
5060+ st .session_state .spec_expected_qps = new_qps
5061+
50545062 if new_qps != qps :
50555063 st .session_state .custom_qps = new_qps
50565064
@@ -5816,33 +5824,31 @@ def render_results_tab(priority: str, models_df: pd.DataFrame):
58165824 st .session_state .pop ('ranked_response' , None )
58175825
58185826 if True : # Always regenerate
5819- # Get custom SLO values from session state (set in Tech Specs tab)
5827+ # Get all specification values from session state (set in Tech Specs tab)
5828+ # These are the EXACT values the user sees on the Technical Specifications tab
58205829 use_case = final_extraction .get ("use_case" , "chatbot_conversational" )
5821-
5822- # Get SLO targets - use custom values if set, otherwise use defaults
5823- # Use explicit None check to handle 0 values correctly
5824- ttft_target = st .session_state .get ("custom_ttft" ) if st .session_state .get ("custom_ttft" ) is not None else (st .session_state .get ("input_ttft" ) if st .session_state .get ("input_ttft" ) is not None else 15000 )
5825- itl_target = st .session_state .get ("custom_itl" ) if st .session_state .get ("custom_itl" ) is not None else (st .session_state .get ("input_itl" ) if st .session_state .get ("input_itl" ) is not None else 200 )
5826- e2e_target = st .session_state .get ("custom_e2e" ) if st .session_state .get ("custom_e2e" ) is not None else (st .session_state .get ("input_e2e" ) if st .session_state .get ("input_e2e" ) is not None else 60000 )
5827- qps_target = st .session_state .get ("custom_qps" ) if st .session_state .get ("custom_qps" ) is not None else (st .session_state .get ("input_qps" ) if st .session_state .get ("input_qps" ) is not None else final_extraction .get ("user_count" , 30 ))
5828-
5829- # Get token config for use case
5830- token_configs = {
5831- "chatbot_conversational" : (512 , 256 ),
5832- "code_completion" : (512 , 256 ),
5833- "code_generation_detailed" : (1024 , 1024 ),
5834- "translation" : (512 , 256 ),
5835- "content_generation" : (512 , 256 ),
5836- "summarization_short" : (4096 , 512 ),
5837- "document_analysis_rag" : (4096 , 512 ),
5838- "long_document_summarization" : (10240 , 1536 ),
5839- "research_legal_analysis" : (10240 , 1536 ), # Fixed: was (4096, 1024)
5840- }
5841- prompt_tokens , output_tokens = token_configs .get (use_case , (512 , 256 ))
5842-
5830+ user_count = final_extraction .get ("user_count" , 1000 )
5831+
5832+ # Get SLO targets from session state (set by number_input widgets)
5833+ ttft_target = st .session_state .get ("custom_ttft" ) or st .session_state .get ("input_ttft" ) or 500
5834+ itl_target = st .session_state .get ("custom_itl" ) or st .session_state .get ("input_itl" ) or 50
5835+ e2e_target = st .session_state .get ("custom_e2e" ) or st .session_state .get ("input_e2e" ) or 10000
5836+
5837+ # Get QPS from session state - this is the value shown in the Expected RPS input
5838+ qps_target = st .session_state .get ("spec_expected_qps" ) or st .session_state .get ("custom_qps" ) or 1
5839+
5840+ # Get token config from session state (set by render_slo_cards)
5841+ prompt_tokens = st .session_state .get ("spec_prompt_tokens" , 512 )
5842+ output_tokens = st .session_state .get ("spec_output_tokens" , 256 )
5843+
5844+ # Get percentile from session state (default to p95)
5845+ # Note: Currently we use a single percentile for all metrics for the backend query
5846+ # The UI allows per-metric percentiles but the backend uses one
5847+ percentile = st .session_state .get ("slo_percentile" , "p95" )
5848+
58435849 business_context = {
58445850 "use_case" : use_case ,
5845- "user_count" : final_extraction . get ( " user_count" , 1000 ) ,
5851+ "user_count" : user_count ,
58465852 "priority" : used_priority ,
58475853 "hardware_preference" : final_extraction .get ("hardware" ),
58485854 "prompt_tokens" : prompt_tokens ,
@@ -5851,6 +5857,7 @@ def render_results_tab(priority: str, models_df: pd.DataFrame):
58515857 "ttft_p95_target_ms" : int (ttft_target ),
58525858 "itl_p95_target_ms" : int (itl_target ),
58535859 "e2e_p95_target_ms" : int (e2e_target ),
5860+ "percentile" : percentile ,
58545861 }
58555862 with st .spinner (f"Scoring { len (models_df )} models with MCDM..." ):
58565863 recommendation = get_enhanced_recommendation (business_context )
0 commit comments