redhat-et
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 5 deletions b/‎.gitignore‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎backend/src/api/routes.py‎
Lines changed: 3 additions & 2 deletions b/‎backend/src/api/routes.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backend/src/context_intent/gpu_normalizer.py‎
Lines changed: 93 additions & 0 deletions b/‎backend/src/context_intent/gpu_normalizer.py‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎backend/src/context_intent/schema.py‎
Lines changed: 4 additions & 3 deletions b/‎backend/src/context_intent/schema.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backend/src/deployment/generator.py‎
Lines changed: 7 additions & 13 deletions b/‎backend/src/deployment/generator.py‎
Lines changed: 7 additions & 13 deletions
@@ -83,8 +83,3 @@ docs/ai_assistant_*
 # Redundant/unused data files (identified in data audit)
 data/slo_ranges_from_benchmarks.json
 data/research/benchmark_slo_ranges.json
-
-# Benchmark source files (already merged into benchmarks_redhat_performance.json)
-data/benchmarks_BLIS.json
-data/benchmarks_estimated_performance.json
-data/benchmarks_interpolated_v2.json
@@ -621,7 +621,7 @@ class RankedRecommendationFromSpecRequest(BaseModel):
     # Intent fields
     use_case: str
     user_count: int
-    hardware_preference: str | None = None
+    preferred_gpu_types: list[str] | None = None  # GPU filter list (empty/None = any GPU)
 
     # Traffic profile fields
     prompt_tokens: int
@@ -670,7 +670,7 @@ async def ranked_recommend_from_spec(request: RankedRecommendationFromSpecReques
         logger.info("=" * 60)
         logger.info(f"  use_case: {request.use_case}")
         logger.info(f"  user_count: {request.user_count}")
-        logger.info(f"  hardware_preference: {request.hardware_preference}")
+        logger.info(f"  preferred_gpu_types: {request.preferred_gpu_types}")
         logger.info(f"  prompt_tokens: {request.prompt_tokens}")
         logger.info(f"  output_tokens: {request.output_tokens}")
         logger.info(f"  expected_qps: {request.expected_qps}")
@@ -696,6 +696,7 @@ async def ranked_recommend_from_spec(request: RankedRecommendationFromSpecReques
                 "use_case": request.use_case,
                 "user_count": request.user_count,
                 "domain_specialization": ["general"],
+                "preferred_gpu_types": request.preferred_gpu_types or [],
             },
             "traffic_profile": {
                 "prompt_tokens": request.prompt_tokens,
 
@@ -0,0 +1,93 @@
+"""GPU type normalization utility.
+
+Normalizes user-specified GPU types to canonical names used in benchmark data.
+Uses ModelCatalog as the single source of truth for GPU aliases.
+"""
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ..knowledge_base.model_catalog import ModelCatalog
+
+logger = logging.getLogger(__name__)
+
+# Canonical GPU names from benchmark data
+CANONICAL_GPUS = {"L4", "A100-40", "A100-80", "H100", "H200", "B200"}
+
+# Expansion map for shorthand/ambiguous names
+# When user says "A100" without specifying variant, include both
+GPU_EXPANSIONS = {
+    "A100": ["A100-80", "A100-40"],
+}
+
+# Singleton catalog instance to avoid repeated loading
+_catalog_instance: "ModelCatalog | None" = None
+
+
+def _get_catalog() -> "ModelCatalog":
+    """Get or create the ModelCatalog singleton."""
+    global _catalog_instance
+    if _catalog_instance is None:
+        from ..knowledge_base.model_catalog import ModelCatalog
+        _catalog_instance = ModelCatalog()
+    return _catalog_instance
+
+
+def normalize_gpu_types(gpu_types: list[str]) -> list[str]:
+    """
+    Normalize GPU types to canonical names using ModelCatalog aliases.
+
+    - Case-insensitive matching
+    - Uses ModelCatalog's alias lookup (from model_catalog.json)
+    - Expands shorthand (A100 → [A100-80, A100-40])
+    - Returns empty list for empty input
+
+    Args:
+        gpu_types: List of GPU type strings from user input or intent extraction
+
+    Returns:
+        List of canonical GPU names (uppercase), deduplicated and sorted
+    """
+    if not gpu_types:
+        return []
+
+    catalog = _get_catalog()
+    normalized = set()
+
+    for gpu in gpu_types:
+        if not gpu or not isinstance(gpu, str):
+            continue
+
+        gpu_stripped = gpu.strip()
+        gpu_upper = gpu_stripped.upper()
+
+        # Skip empty or "any gpu" values
+        if not gpu_upper or gpu_upper == "ANY GPU":
+            continue
+
+        # Check if it's an expansion case (e.g., A100 → both variants)
+        if gpu_upper in GPU_EXPANSIONS:
+            normalized.update(GPU_EXPANSIONS[gpu_upper])
+            logger.debug(f"Expanded '{gpu}' to {GPU_EXPANSIONS[gpu_upper]}")
+            continue
+
+        # Use ModelCatalog's alias lookup (handles case-insensitivity)
+        gpu_info = catalog.get_gpu_type(gpu_stripped)
+        if gpu_info:
+            normalized.add(gpu_info.gpu_type.upper())
+            logger.debug(f"Resolved '{gpu}' to '{gpu_info.gpu_type}' via ModelCatalog")
+            continue
+
+        # Check if it's already a canonical name (direct match)
+        if gpu_upper in CANONICAL_GPUS:
+            normalized.add(gpu_upper)
+            continue
+
+        # Unknown GPU type - log warning and skip
+        logger.warning(
+            f"Unknown GPU type '{gpu}' - not found in ModelCatalog or canonical list. "
+            "Skipping this GPU filter."
+        )
+
+    return sorted(normalized)  # Sorted for consistent ordering
@@ -71,9 +71,10 @@ class DeploymentIntent(BaseModel):
     )
 
     # Hardware preference extracted from natural language
-    preferred_gpu_type: str = Field(
-        default="Any GPU",
-        description="User's preferred GPU type if mentioned (e.g., H100, H200, A100, L4) or 'Any GPU' if not specified"
+    preferred_gpu_types: list[str] = Field(
+        default_factory=list,
+        description="List of user's preferred GPU types (empty = any GPU). "
+                    "Canonical names: L4, A100-40, A100-80, H100, H200, B200"
     )
 
     # Priority hints extracted from natural language (used for weight calculation)
 
@@ -12,24 +12,14 @@
 from jinja2 import Environment, FileSystemLoader
 
 from ..context_intent.schema import DeploymentRecommendation
+from ..knowledge_base.model_catalog import ModelCatalog
 
 logger = logging.getLogger(__name__)
 
 
 class DeploymentGenerator:
     """Generate deployment configurations from recommendations."""
 
-    # GPU pricing (USD per hour) - representative cloud pricing
-    # Keys match hardware names from benchmark database
-    GPU_PRICING = {
-        "NVIDIA-L4": 0.50,
-        "NVIDIA-A10G": 1.00,
-        "NVIDIA-A100-40GB": 3.00,
-        "NVIDIA-A100-80GB": 4.50,
-        "H100": 8.00,
-        "H200": 10.00,
-    }
-
     # vLLM version to use
     VLLM_VERSION = "v0.6.2"
 
@@ -61,6 +51,9 @@ def __init__(self, output_dir: str | None = None, simulator_mode: bool = False):
         # Simulator mode (for development/testing without GPUs)
         self.simulator_mode = simulator_mode
 
+        # Model catalog for GPU pricing lookup
+        self._catalog = ModelCatalog()
+
         logger.info(
             f"DeploymentGenerator initialized with output_dir: {self.output_dir}, simulator_mode: {simulator_mode}"
         )
@@ -127,8 +120,9 @@ def _prepare_template_context(
         traffic = recommendation.traffic_profile
         slo = recommendation.slo_targets
 
-        # Calculate GPU hourly rate
-        gpu_hourly_rate = self.GPU_PRICING.get(gpu_config.gpu_type, 1.0)
+        # Calculate GPU hourly rate from ModelCatalog
+        gpu_info = self._catalog.get_gpu_type(gpu_config.gpu_type)
+        gpu_hourly_rate = gpu_info.cost_per_hour_usd if gpu_info else 1.0
 
         # Determine resource requests based on GPU type
         gpu_type = gpu_config.gpu_type
Original file line number	Diff line number	Diff line change
`@@ -71,9 +71,10 @@ class DeploymentIntent(BaseModel):`
`71`	`71`	`)`
`72`	`72`
`73`	`73`	`# Hardware preference extracted from natural language`
`74`		`- preferred_gpu_type: str = Field(`
`75`		`- default="Any GPU",`
`76`		`- description="User's preferred GPU type if mentioned (e.g., H100, H200, A100, L4) or 'Any GPU' if not specified"`
	`74`	`+ preferred_gpu_types: list[str] = Field(`
	`75`	`+ default_factory=list,`
	`76`	`+ description="List of user's preferred GPU types (empty = any GPU). "`
	`77`	`+ "Canonical names: L4, A100-40, A100-80, H100, H200, B200"`
`77`	`78`	`)`
`78`	`79`
`79`	`80`	`# Priority hints extracted from natural language (used for weight calculation)`