Skip to content

Commit e7a2ab8

Browse files
authored
Merge pull request #749 from NVIDIA/am/dynamo-multinode
Update documentation on Dynamo k8s multi node
2 parents ab05b2b + 73b82e0 commit e7a2ab8

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

conf/experimental/ai_dynamo/test/vllm.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ description = "vLLM backend with Qwen3-0.6B model"
1919
test_template_name = "AIDynamo"
2020

2121
[cmd_args]
22-
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1"
22+
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0"
2323

2424
[cmd_args.dynamo]
2525
backend = "vllm"

doc/workloads/ai_dynamo.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ Before running the AI Dynamo workload on a Kubernetes cluster, ensure that the c
1616
.. code-block:: bash
1717
1818
export NAMESPACE=dynamo-system
19-
export RELEASE_VERSION=0.6.1 # replace with the desired release version
19+
export RELEASE_VERSION=0.7.0 # replace with the desired release version
2020
21-
helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-crds-${RELEASE_VERSION}.tgz
22-
helm install dynamo-crds dynamo-crds-${RELEASE_VERSION}.tgz --namespace default
21+
helm upgrade -n default -i dynamo-crds https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-crds-${RELEASE_VERSION}.tgz
22+
helm upgrade -n default -i dynamo-platform https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-${RELEASE_VERSION}.tgz
2323
24-
helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-${RELEASE_VERSION}.tgz
25-
helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace ${NAMESPACE} --create-namespace
24+
# The following components are required for multi node only.
25+
# Versions should be aligned with Dynamo version.
26+
helm upgrade -n default -i grove oci://ghcr.io/ai-dynamo/grove/grove-charts:v0.0.0-gd462e65
27+
helm upgrade -n default -i kai-scheduler oci://ghcr.io/nvidia/kai-scheduler/kai-scheduler:0.0.0-4c29820
2628
2729
Launch and Monitor the Job
2830
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

src/cloudai/systems/kubernetes/kubernetes_system.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,9 @@ def _get_frontend_pod_name(self) -> str:
328328
for pod in self.core_v1.list_namespaced_pod(namespace=self.default_namespace).items:
329329
labels = pod.metadata.labels
330330
logging.debug(f"Found pod: {pod.metadata.name} with labels: {labels}")
331-
if labels and str(labels.get("nvidia.com/dynamo-component", "")).lower() == "frontend":
331+
if labels and str(labels.get("nvidia.com/dynamo-component", "")).lower() == "frontend": # v0.6.x
332+
return pod.metadata.name
333+
if labels and str(labels.get("nvidia.com/dynamo-component-type", "")).lower() == "frontend": # v0.7.x
332334
return pod.metadata.name
333335
raise RuntimeError("No frontend pod found for the job")
334336

0 commit comments

Comments
 (0)