Skip to content

Commit 67c11ff

Browse files
authored
Merge pull request #78 from anfredette/gpu-preference
feat: Filter recommendations by extracted hardware preferences
2 parents 74e7aa5 + 0531a53 commit 67c11ff

File tree

12 files changed

+57344
-200
lines changed

12 files changed

+57344
-200
lines changed

.gitignore

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,3 @@ docs/ai_assistant_*
8383
# Redundant/unused data files (identified in data audit)
8484
data/slo_ranges_from_benchmarks.json
8585
data/research/benchmark_slo_ranges.json
86-
87-
# Benchmark source files (already merged into benchmarks_redhat_performance.json)
88-
data/benchmarks_BLIS.json
89-
data/benchmarks_estimated_performance.json
90-
data/benchmarks_interpolated_v2.json

backend/src/api/routes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ class RankedRecommendationFromSpecRequest(BaseModel):
621621
# Intent fields
622622
use_case: str
623623
user_count: int
624-
hardware_preference: str | None = None
624+
preferred_gpu_types: list[str] | None = None # GPU filter list (empty/None = any GPU)
625625

626626
# Traffic profile fields
627627
prompt_tokens: int
@@ -670,7 +670,7 @@ async def ranked_recommend_from_spec(request: RankedRecommendationFromSpecReques
670670
logger.info("=" * 60)
671671
logger.info(f" use_case: {request.use_case}")
672672
logger.info(f" user_count: {request.user_count}")
673-
logger.info(f" hardware_preference: {request.hardware_preference}")
673+
logger.info(f" preferred_gpu_types: {request.preferred_gpu_types}")
674674
logger.info(f" prompt_tokens: {request.prompt_tokens}")
675675
logger.info(f" output_tokens: {request.output_tokens}")
676676
logger.info(f" expected_qps: {request.expected_qps}")
@@ -696,6 +696,7 @@ async def ranked_recommend_from_spec(request: RankedRecommendationFromSpecReques
696696
"use_case": request.use_case,
697697
"user_count": request.user_count,
698698
"domain_specialization": ["general"],
699+
"preferred_gpu_types": request.preferred_gpu_types or [],
699700
},
700701
"traffic_profile": {
701702
"prompt_tokens": request.prompt_tokens,
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""GPU type normalization utility.
2+
3+
Normalizes user-specified GPU types to canonical names used in benchmark data.
4+
Uses ModelCatalog as the single source of truth for GPU aliases.
5+
"""
6+
7+
import logging
8+
from typing import TYPE_CHECKING
9+
10+
if TYPE_CHECKING:
11+
from ..knowledge_base.model_catalog import ModelCatalog
12+
13+
logger = logging.getLogger(__name__)
14+
15+
# Canonical GPU names from benchmark data
16+
CANONICAL_GPUS = {"L4", "A100-40", "A100-80", "H100", "H200", "B200"}
17+
18+
# Expansion map for shorthand/ambiguous names
19+
# When user says "A100" without specifying variant, include both
20+
GPU_EXPANSIONS = {
21+
"A100": ["A100-80", "A100-40"],
22+
}
23+
24+
# Singleton catalog instance to avoid repeated loading
25+
_catalog_instance: "ModelCatalog | None" = None
26+
27+
28+
def _get_catalog() -> "ModelCatalog":
29+
"""Get or create the ModelCatalog singleton."""
30+
global _catalog_instance
31+
if _catalog_instance is None:
32+
from ..knowledge_base.model_catalog import ModelCatalog
33+
_catalog_instance = ModelCatalog()
34+
return _catalog_instance
35+
36+
37+
def normalize_gpu_types(gpu_types: list[str]) -> list[str]:
38+
"""
39+
Normalize GPU types to canonical names using ModelCatalog aliases.
40+
41+
- Case-insensitive matching
42+
- Uses ModelCatalog's alias lookup (from model_catalog.json)
43+
- Expands shorthand (A100 → [A100-80, A100-40])
44+
- Returns empty list for empty input
45+
46+
Args:
47+
gpu_types: List of GPU type strings from user input or intent extraction
48+
49+
Returns:
50+
List of canonical GPU names (uppercase), deduplicated and sorted
51+
"""
52+
if not gpu_types:
53+
return []
54+
55+
catalog = _get_catalog()
56+
normalized = set()
57+
58+
for gpu in gpu_types:
59+
if not gpu or not isinstance(gpu, str):
60+
continue
61+
62+
gpu_stripped = gpu.strip()
63+
gpu_upper = gpu_stripped.upper()
64+
65+
# Skip empty or "any gpu" values
66+
if not gpu_upper or gpu_upper == "ANY GPU":
67+
continue
68+
69+
# Check if it's an expansion case (e.g., A100 → both variants)
70+
if gpu_upper in GPU_EXPANSIONS:
71+
normalized.update(GPU_EXPANSIONS[gpu_upper])
72+
logger.debug(f"Expanded '{gpu}' to {GPU_EXPANSIONS[gpu_upper]}")
73+
continue
74+
75+
# Use ModelCatalog's alias lookup (handles case-insensitivity)
76+
gpu_info = catalog.get_gpu_type(gpu_stripped)
77+
if gpu_info:
78+
normalized.add(gpu_info.gpu_type.upper())
79+
logger.debug(f"Resolved '{gpu}' to '{gpu_info.gpu_type}' via ModelCatalog")
80+
continue
81+
82+
# Check if it's already a canonical name (direct match)
83+
if gpu_upper in CANONICAL_GPUS:
84+
normalized.add(gpu_upper)
85+
continue
86+
87+
# Unknown GPU type - log warning and skip
88+
logger.warning(
89+
f"Unknown GPU type '{gpu}' - not found in ModelCatalog or canonical list. "
90+
"Skipping this GPU filter."
91+
)
92+
93+
return sorted(normalized) # Sorted for consistent ordering

backend/src/context_intent/schema.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,10 @@ class DeploymentIntent(BaseModel):
7171
)
7272

7373
# Hardware preference extracted from natural language
74-
preferred_gpu_type: str = Field(
75-
default="Any GPU",
76-
description="User's preferred GPU type if mentioned (e.g., H100, H200, A100, L4) or 'Any GPU' if not specified"
74+
preferred_gpu_types: list[str] = Field(
75+
default_factory=list,
76+
description="List of user's preferred GPU types (empty = any GPU). "
77+
"Canonical names: L4, A100-40, A100-80, H100, H200, B200"
7778
)
7879

7980
# Priority hints extracted from natural language (used for weight calculation)

backend/src/deployment/generator.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,14 @@
1212
from jinja2 import Environment, FileSystemLoader
1313

1414
from ..context_intent.schema import DeploymentRecommendation
15+
from ..knowledge_base.model_catalog import ModelCatalog
1516

1617
logger = logging.getLogger(__name__)
1718

1819

1920
class DeploymentGenerator:
2021
"""Generate deployment configurations from recommendations."""
2122

22-
# GPU pricing (USD per hour) - representative cloud pricing
23-
# Keys match hardware names from benchmark database
24-
GPU_PRICING = {
25-
"NVIDIA-L4": 0.50,
26-
"NVIDIA-A10G": 1.00,
27-
"NVIDIA-A100-40GB": 3.00,
28-
"NVIDIA-A100-80GB": 4.50,
29-
"H100": 8.00,
30-
"H200": 10.00,
31-
}
32-
3323
# vLLM version to use
3424
VLLM_VERSION = "v0.6.2"
3525

@@ -61,6 +51,9 @@ def __init__(self, output_dir: str | None = None, simulator_mode: bool = False):
6151
# Simulator mode (for development/testing without GPUs)
6252
self.simulator_mode = simulator_mode
6353

54+
# Model catalog for GPU pricing lookup
55+
self._catalog = ModelCatalog()
56+
6457
logger.info(
6558
f"DeploymentGenerator initialized with output_dir: {self.output_dir}, simulator_mode: {simulator_mode}"
6659
)
@@ -127,8 +120,9 @@ def _prepare_template_context(
127120
traffic = recommendation.traffic_profile
128121
slo = recommendation.slo_targets
129122

130-
# Calculate GPU hourly rate
131-
gpu_hourly_rate = self.GPU_PRICING.get(gpu_config.gpu_type, 1.0)
123+
# Calculate GPU hourly rate from ModelCatalog
124+
gpu_info = self._catalog.get_gpu_type(gpu_config.gpu_type)
125+
gpu_hourly_rate = gpu_info.cost_per_hour_usd if gpu_info else 1.0
132126

133127
# Determine resource requests based on GPU type
134128
gpu_type = gpu_config.gpu_type

0 commit comments

Comments
 (0)