Skip to content

Commit 96a0250

Browse files
committed
Configmap-based GPU mock for the launcher
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 63537f5 commit 96a0250

File tree

6 files changed

+121
-8
lines changed

6 files changed

+121
-8
lines changed

dockerfiles/Dockerfile.launcher.cpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ WORKDIR /app
1717
COPY inference_server/launcher/launcher.py inference_server/launcher/gputranslator.py /app/
1818

1919
# Install uvicorn for serving the launcher API and nvidia-ml-py for gputranslator
20-
RUN pip install --root-user-action=ignore --no-cache-dir uvicorn nvidia-ml-py
20+
RUN pip install --root-user-action=ignore --no-cache-dir uvicorn nvidia-ml-py kubernetes
2121

2222
ENTRYPOINT ["uvicorn", "--app-dir", "/app", "launcher:app"]

inference_server/launcher/gputranslator.py

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,34 @@
1818
"""
1919

2020
import importlib.metadata
21+
import json
2122
import logging
22-
from typing import Dict
23+
import os
24+
from typing import Dict, Optional
2325

2426
import pynvml
27+
from kubernetes import client, config
2528

2629
logger = logging.getLogger(__name__)
2730

2831

2932
# VLLM process manager
3033
class GpuTranslator:
31-
def __init__(self, mock_gpus: bool = False, mock_gpu_count: int = 8):
34+
def __init__(self, mock_gpus: bool = False, mock_gpu_count: int = 8, node_name: Optional[str] = None):
3235
"""
3336
Initialize GPU Translator
3437
3538
Args:
3639
mock_gpus: If True, skip pynvml and use mock mode for testing
3740
mock_gpu_count: Number of mock GPUs to create (default: 8)
41+
node_name: Kubernetes node name for ConfigMap-based GPU discovery
3842
"""
3943
self.mapping = {}
4044
self.reverse_mapping = {}
4145
self.device_count = 0
4246
self.mock_mode = mock_gpus
4347
self.mock_gpu_count = mock_gpu_count
48+
self.node_name = node_name or os.getenv("NODE_NAME")
4449
if not self.mock_mode:
4550
self._check_library()
4651
self._populate_mapping()
@@ -62,11 +67,79 @@ def _check_library(self):
6267
f"package {package_name} not found. Please install it."
6368
)
6469

70+
def _load_gpu_map_from_configmap(self) -> Optional[Dict[str, int]]:
71+
"""
72+
Load GPU mapping from Kubernetes ConfigMap 'gpu-map'.
73+
74+
Returns:
75+
Dict[str, int]: GPU UUID to index mapping, or None if ConfigMap not available
76+
"""
77+
if not self.node_name:
78+
logger.info("No node name provided, skipping ConfigMap GPU discovery")
79+
return None
80+
81+
try:
82+
# Try to load in-cluster config first, fall back to kubeconfig
83+
try:
84+
config.load_incluster_config()
85+
except config.ConfigException:
86+
config.load_kube_config()
87+
88+
v1 = client.CoreV1Api()
89+
90+
# Read the ConfigMap
91+
namespace = os.getenv("NAMESPACE", "default")
92+
cm = v1.read_namespaced_config_map(name="gpu-map", namespace=namespace)
93+
94+
if not cm.data or self.node_name not in cm.data:
95+
logger.warning(
96+
"Node '%s' not found in ConfigMap 'gpu-map' in namespace '%s'",
97+
self.node_name,
98+
namespace
99+
)
100+
return None
101+
102+
# Parse the JSON mapping for this node
103+
node_gpu_data = cm.data[self.node_name]
104+
gpu_mapping = json.loads(node_gpu_data)
105+
106+
logger.info(
107+
"Loaded GPU mapping from ConfigMap for node '%s': %s",
108+
self.node_name,
109+
gpu_mapping
110+
)
111+
return gpu_mapping
112+
113+
except Exception as e:
114+
logger.warning(
115+
"Failed to load GPU mapping from ConfigMap: %s. Falling back to mock mode.",
116+
e
117+
)
118+
return None
119+
65120
def _populate_mapping(self):
66121
"""
67122
Creates mapping and reverse_mapping for the GPU Translator.
68-
In mock mode, pre-populates with mock GPU UUIDs following the pattern GPU-{index}.
123+
Priority order:
124+
1. ConfigMap 'gpu-map' (if in Kubernetes and node_name available)
125+
2. Mock mode (if mock_gpus=True)
126+
3. Real GPUs via pynvml
69127
"""
128+
# Try ConfigMap first if in mock mode and node_name is available
129+
if self.mock_mode and self.node_name:
130+
configmap_mapping = self._load_gpu_map_from_configmap()
131+
if configmap_mapping:
132+
self.mapping = configmap_mapping
133+
self.reverse_mapping = {v: k for k, v in self.mapping.items()}
134+
self.device_count = len(self.mapping)
135+
logger.info(
136+
"GPU Translator initialized from ConfigMap with %d GPUs for node '%s'",
137+
self.device_count,
138+
self.node_name
139+
)
140+
return
141+
142+
# Fall back to hardcoded mock mode
70143
if self.mock_mode:
71144
# Pre-populate with mock GPUs following the test pattern: GPU-0, GPU-1, etc.
72145
for index in range(self.mock_gpu_count):
@@ -77,6 +150,7 @@ def _populate_mapping(self):
77150
logger.info("GPU Translator initialized in mock mode with %d mock GPUs", self.mock_gpu_count)
78151
return
79152

153+
# Use real GPUs via pynvml
80154
try:
81155
pynvml.nvmlInit()
82156
self.device_count = pynvml.nvmlDeviceGetCount()

inference_server/launcher/launcher.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def __init__(
8181
for uuid_str in config.gpu_uuids:
8282
index = gpu_translator.uuid_to_index(uuid_str)
8383
cuda_indices.append(str(index))
84+
logger.info(
85+
f"Translated GPU UUIDs {config.gpu_uuids} to indices {cuda_indices}."
86+
)
8487

8588
if config.env_vars is None:
8689
config.env_vars = {}
@@ -193,9 +196,13 @@ def get_logs(
193196

194197
# Multi-instance vLLM process manager
195198
class VllmMultiProcessManager:
196-
def __init__(self, mock_gpus: bool = False, mock_gpu_count: int = 8):
199+
def __init__(self, mock_gpus: bool = False, mock_gpu_count: int = 8, node_name: Optional[str] = None):
197200
self.instances: Dict[str, VllmInstance] = {}
198-
self.gpu_translator = GpuTranslator(mock_gpus=mock_gpus, mock_gpu_count=mock_gpu_count)
201+
self.gpu_translator = GpuTranslator(
202+
mock_gpus=mock_gpus,
203+
mock_gpu_count=mock_gpu_count,
204+
node_name=node_name
205+
)
199206

200207
def create_instance(
201208
self, vllm_config: VllmConfig, instance_id: Optional[str] = None
@@ -605,10 +612,14 @@ def set_env_vars(env_vars: Dict[str, Any]):
605612

606613
args = parser.parse_args()
607614

615+
# Get node name from environment variable
616+
node_name = os.getenv("NODE_NAME")
617+
608618
# Reinitialize the global manager with mock mode settings
609619
vllm_manager = VllmMultiProcessManager(
610620
mock_gpus=args.mock_gpus,
611-
mock_gpu_count=args.mock_gpu_count
621+
mock_gpu_count=args.mock_gpu_count,
622+
node_name=node_name
612623
)
613624

614625
uvicorn.run(

inference_server/launcher/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ pydantic
33
uvicorn
44
uvloop
55
nvidia-ml-py
6+
kubernetes
67
# WARNING: vllm must be built from source on a macOS Silicon
78
vllm; sys_platform != "darwin" or platform_machine != "arm64"

test/e2e/mkobjs.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env bash
22

33
inst=$(date +%d-%H-%M-%S)
4-
server_img=$(make echo-var VAR=TEST_SERVER_IMG)
54
requester_img=$(make echo-var VAR=TEST_REQUESTER_IMG)
65
launcher_img=$(make echo-var VAR=TEST_LAUNCHER_IMG)
76
if out=$(kubectl apply -f - 2>&1 <<EOF
@@ -78,6 +77,7 @@ spec:
7877
maxSleepingInstances: 1
7978
podTemplate:
8079
spec:
80+
serviceAccount: testlauncher
8181
containers:
8282
- name: inference-server
8383
image: $launcher_img
@@ -92,6 +92,13 @@ spec:
9292
--host 0.0.0.0 \
9393
--port 8001 \
9494
--log-level info
95+
env:
96+
- name: NODE_NAME
97+
valueFrom:
98+
fieldRef: { fieldPath: spec.nodeName }
99+
- name: NAMESPACE
100+
valueFrom:
101+
fieldRef: { fieldPath: metadata.namespace }
95102
---
96103
apiVersion: apps/v1
97104
kind: ReplicaSet

test/e2e/run-launcher-based.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,26 @@ kubectl create rolebinding testreq --role=testreq --serviceaccount=$(kubectl get
114114
kubectl create clusterrolebinding testreq-view --clusterrole=view --serviceaccount=$(kubectl get sa default -o jsonpath={.metadata.namespace}):testreq
115115

116116
kubectl create sa testreq
117+
118+
kubectl apply -f - <<EOF
119+
apiVersion: rbac.authorization.k8s.io/v1
120+
kind: Role
121+
metadata:
122+
name: testlauncher
123+
rules:
124+
- apiGroups:
125+
- ""
126+
resources:
127+
- configmaps
128+
verbs:
129+
- get
130+
- list
131+
- watch
132+
EOF
133+
134+
kubectl create rolebinding testlauncher --role=testlauncher --serviceaccount=$(kubectl get sa default -o jsonpath={.metadata.namespace}):testlauncher
135+
136+
kubectl create sa testlauncher
117137
kubectl create cm gpu-map
118138
kubectl get nodes -o name | sed 's%^node/%%' | while read node; do
119139
kubectl label node $node nvidia.com/gpu.present=true nvidia.com/gpu.product=NVIDIA-L40S nvidia.com/gpu.count=2 --overwrite=true

0 commit comments

Comments
 (0)