Skip to content

Commit 5d9f022

Browse files
authored
Merge pull request #112 from anfredette/yaml-gen-test
fix: use correct GPU node selector labels and improve prod probe timeouts
2 parents c585412 + 2defd27 commit 5d9f022

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

data/configuration/model_catalog.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,7 @@
12801280
{
12811281
"gpu_type": "L4",
12821282
"aliases": ["NVIDIA-L4", "L4"],
1283+
"node_selector_label": "NVIDIA-L4",
12831284
"memory_gb": 24,
12841285
"compute_capability": "8.9",
12851286
"typical_use_cases": ["inference"],
@@ -1293,6 +1294,7 @@
12931294
{
12941295
"gpu_type": "A10G",
12951296
"aliases": ["NVIDIA-A10G", "A10G"],
1297+
"node_selector_label": "NVIDIA-A10G",
12961298
"memory_gb": 24,
12971299
"compute_capability": "8.6",
12981300
"typical_use_cases": ["inference"],
@@ -1306,6 +1308,7 @@
13061308
{
13071309
"gpu_type": "A100-40",
13081310
"aliases": ["NVIDIA-A100-40GB", "A100-40", "A100-40GB"],
1311+
"node_selector_label": "NVIDIA-A100-SXM4-40GB",
13091312
"memory_gb": 40,
13101313
"compute_capability": "8.0",
13111314
"typical_use_cases": ["inference", "training"],
@@ -1319,6 +1322,7 @@
13191322
{
13201323
"gpu_type": "A100-80",
13211324
"aliases": ["NVIDIA-A100-80GB", "A100-80", "A100-80GB"],
1325+
"node_selector_label": "NVIDIA-A100-SXM4-80GB",
13221326
"memory_gb": 80,
13231327
"compute_capability": "8.0",
13241328
"typical_use_cases": ["inference", "training"],
@@ -1332,6 +1336,7 @@
13321336
{
13331337
"gpu_type": "H100",
13341338
"aliases": ["NVIDIA-H100", "H100", "H100-80GB"],
1339+
"node_selector_label": "NVIDIA-H100-80GB-HBM3",
13351340
"memory_gb": 80,
13361341
"compute_capability": "9.0",
13371342
"typical_use_cases": ["inference", "training"],
@@ -1345,6 +1350,7 @@
13451350
{
13461351
"gpu_type": "H200",
13471352
"aliases": ["NVIDIA-H200", "H200", "H200-141GB"],
1353+
"node_selector_label": "NVIDIA-H200-141GB-HBM3",
13481354
"memory_gb": 141,
13491355
"compute_capability": "9.0",
13501356
"typical_use_cases": ["inference", "training"],
@@ -1358,6 +1364,7 @@
13581364
{
13591365
"gpu_type": "B200",
13601366
"aliases": ["NVIDIA-B200", "B200"],
1367+
"node_selector_label": "NVIDIA-B200",
13611368
"memory_gb": 192,
13621369
"compute_capability": "10.0",
13631370
"typical_use_cases": ["inference", "training"],
@@ -1371,6 +1378,7 @@
13711378
{
13721379
"gpu_type": "MI300X",
13731380
"aliases": ["AMD-MI300X", "MI300X", "AMD-Instinct-MI300X"],
1381+
"node_selector_label": "AMD-Instinct-MI300X",
13741382
"memory_gb": 192,
13751383
"compute_capability": "N/A",
13761384
"typical_use_cases": ["inference", "training"],

src/neuralnav/configuration/generator.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class DeploymentGenerator:
2121
"""Generate deployment configurations from recommendations."""
2222

2323
# vLLM version to use
24-
VLLM_VERSION = "v0.6.2"
24+
VLLM_VERSION = "latest"
2525

2626
def __init__(self, output_dir: str | None = None, simulator_mode: bool = False):
2727
"""
@@ -122,9 +122,15 @@ def _prepare_template_context(
122122

123123
assert gpu_config is not None, "gpu_config is required for template context"
124124

125-
# Calculate GPU hourly rate from ModelCatalog
125+
# Look up GPU info from ModelCatalog
126126
gpu_info = self._catalog.get_gpu_type(gpu_config.gpu_type)
127-
gpu_hourly_rate = gpu_info.cost_per_hour_usd if gpu_info else 1.0
127+
if gpu_info is None:
128+
raise ValueError(
129+
f"Unknown GPU type '{gpu_config.gpu_type}'. "
130+
f"Add it to the GPU catalog in data/configuration/model_catalog.json."
131+
)
132+
gpu_hourly_rate = gpu_info.cost_per_hour_usd
133+
gpu_node_selector_label = gpu_info.node_selector_label
128134

129135
# Determine resource requests based on GPU type
130136
gpu_type = gpu_config.gpu_type
@@ -187,6 +193,7 @@ def _prepare_template_context(
187193
"simulator_mode": self.simulator_mode,
188194
# GPU configuration
189195
"gpu_type": gpu_config.gpu_type,
196+
"gpu_node_selector_label": gpu_node_selector_label,
190197
"gpu_count": gpu_config.gpu_count,
191198
"tensor_parallel": gpu_config.tensor_parallel,
192199
"gpus_per_replica": gpu_config.tensor_parallel, # GPUs per pod

src/neuralnav/configuration/templates/kserve-inferenceservice.yaml.j2

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,25 @@ spec:
7878
{% if simulator_mode %}
7979
initialDelaySeconds: 10
8080
{% else %}
81-
initialDelaySeconds: 120
81+
initialDelaySeconds: 600
8282
{% endif %}
8383
periodSeconds: 30
8484
timeoutSeconds: 10
85+
failureThreshold: 5
8586
readinessProbe:
8687
httpGet:
8788
path: /health
8889
port: 8080
8990
{% if simulator_mode %}
9091
initialDelaySeconds: 5
9192
{% else %}
92-
initialDelaySeconds: 60
93+
initialDelaySeconds: 120
9394
{% endif %}
9495
periodSeconds: 10
9596
timeoutSeconds: 5
9697
{% if not simulator_mode %}
9798
nodeSelector:
98-
nvidia.com/gpu.product: {{ gpu_type }}
99+
nvidia.com/gpu.product: {{ gpu_node_selector_label }}
99100
tolerations:
100101
- key: nvidia.com/gpu
101102
operator: Exists

src/neuralnav/knowledge_base/model_catalog.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class GPUType:
5454
def __init__(self, data: dict):
5555
self.gpu_type = data["gpu_type"]
5656
self.aliases = data.get("aliases", [data["gpu_type"]]) # Default to primary name
57+
self.node_selector_label = data.get("node_selector_label", self.aliases[0])
5758
self.memory_gb = data["memory_gb"]
5859
self.compute_capability = data["compute_capability"]
5960
self.typical_use_cases = data["typical_use_cases"]
@@ -88,6 +89,7 @@ def to_dict(self) -> dict:
8889
return {
8990
"gpu_type": self.gpu_type,
9091
"aliases": self.aliases,
92+
"node_selector_label": self.node_selector_label,
9193
"memory_gb": self.memory_gb,
9294
"compute_capability": self.compute_capability,
9395
"typical_use_cases": self.typical_use_cases,

0 commit comments

Comments
 (0)