Skip to content

Commit 0c834dc

Browse files
[Configuration Explorer] Add json file as "GPU database" (llm-d#453)
The goal of this emulated database is to allow it to available for both Configuration Explorer/Capacity Planner and Standup Also silenced the "None of PyTorch, TensorFlow >= 2.0, or Flax have been found" message Finally, re-enabled the CI/CD testing on `kind` cluster. Signed-off-by: maugustosilva <maugusto.silva@gmail.com>
1 parent 4d8814d commit 0c834dc

File tree

8 files changed

+77
-47
lines changed

8 files changed

+77
-47
lines changed

.github/workflows/ci-pr-benchmark.yaml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ jobs:
2828

2929
- name: Create k8s Kind Cluster
3030
uses: helm/kind-action@v1
31+
with:
32+
version: v0.30.0
3133

3234
- name: Label Node Affinity from inference-sim Scenario
3335
run: |
@@ -47,14 +49,26 @@ jobs:
4749

4850
- name: Standup a modelservice using llm-d-inference-sim
4951
run: |
50-
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9 || true
52+
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8
53+
shell: bash
54+
55+
- name: Check
56+
run: sleep 120; kubectl get crd | grep inference
57+
shell: bash
58+
59+
- name: Standup a modelservice using llm-d-inference-sim
60+
run: |
61+
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9
62+
shell: bash
5163

5264
- name: Run harness (mock)
5365
env:
5466
LLMD_CONTROL_DRY_RUN: 1 # TODO: harness doesn't work now for kind bc no harness endpoint
5567
run: |
56-
./setup/run.sh -c kind_sim_fb --dry-run || true
68+
./setup/run.sh -c kind_sim_fb --dry-run
69+
shell: bash
5770

5871
- name: Teardown
5972
run: |
6073
./setup/teardown.sh -c kind_sim_fb
74+
shell: bash

build/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ RUN cd fmperf; \
4747

4848
ARG INFERENCE_PERF_REPO=https://github.com/kubernetes-sigs/inference-perf.git
4949
ARG INFERENCE_PERF_BRANCH=main
50-
ARG INFERENCE_PERF_COMMIT=1ccc48b6bb9c9abb61558b719041fb000b265e59
50+
ARG INFERENCE_PERF_COMMIT=b81afa49e026417749884ac905425e70837ebfd3
5151
RUN git clone --branch ${INFERENCE_PERF_BRANCH} ${INFERENCE_PERF_REPO}
5252
RUN cd inference-perf; \
5353
git checkout ${INFERENCE_PERF_COMMIT}; \

config_explorer/db.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"AMD_INSTINCT_MI300X": {
3+
"memory": 192,
4+
"prefix": "MI300X"
5+
},
6+
"NVIDIA-H100-80GB-HBM3": {
7+
"memory": 80,
8+
"prefix": "H100"
9+
},
10+
"NVIDIA-A100-40GB": {
11+
"memory": 40,
12+
"prefix": "A100"
13+
},
14+
"NVIDIA-A100-80GB": {
15+
"memory": 80,
16+
"prefix": "A100"
17+
},
18+
"NVIDIA-H100-80GB": {
19+
"memory": 80,
20+
"prefix": "H100"
21+
},
22+
"NVIDIA-L40-40GB": {
23+
"memory": 40,
24+
"prefix": "L40"
25+
},
26+
"NVIDIA-RTX-4090": {
27+
"memory": 24,
28+
"prefix": "RTX4090"
29+
},
30+
"NVIDIA-RTX-5090": {
31+
"memory": 32,
32+
"prefix": "RTX5090"
33+
},
34+
"NVIDIA-RTX-6000": {
35+
"memory": 48,
36+
"prefix": "RTX6000"
37+
},
38+
"NVIDIA-A6000": {
39+
"memory": 48,
40+
"prefix": "A6000"
41+
},
42+
"NVIDIA-A4000": {
43+
"memory": 16,
44+
"prefix": "A4000"
45+
},
46+
"NVIDIA-T4": {
47+
"memory": 16,
48+
"prefix": "T4"
49+
}
50+
}

config_explorer/db.py

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,5 @@
11
"""
22
Mocks DB storing info about common accelerators used for LLM serving and inference
33
"""
4-
5-
gpu_specs = {
6-
# https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
7-
# https://medium.com/@bijit211987/top-nvidia-gpus-for-llm-inference-8a5316184a10
8-
# https://www.databasemart.com/blog/best-nvidia-gpus-for-llm-inference-2025?srsltid=AfmBOopcvcdN6yzBF24k7_DyRS_csYOmNyDLJK7zq9Rg89weW6AQAx5F
9-
"NVIDIA-H100-80GB-HBM3": {
10-
"memory": 80
11-
},
12-
"NVIDIA-A100-40GB": {
13-
"memory": 40
14-
},
15-
"NVIDIA-A100-80GB": {
16-
"memory": 80
17-
},
18-
"NVIDIA-H100-80GB": {
19-
"memory": 80
20-
},
21-
"NVIDIA-L40-40GB": {
22-
"memory": 40
23-
},
24-
"NVIDIA-RTX-4090": {
25-
"memory": 24
26-
},
27-
"NVIDIA-RTX-5090": {
28-
"memory": 32
29-
},
30-
"NVIDIA-RTX-6000":{
31-
"memory": 48
32-
},
33-
"NVIDIA-A6000": {
34-
"memory": 48
35-
},
36-
"NVIDIA-A4000": {
37-
"memory": 16
38-
},
39-
"NVIDIA-T4": {
40-
"memory": 16
41-
}
42-
}
4+
import json,os
5+
gpu_specs=json.loads('db.json')

config_explorer/src/config_explorer/capacity_planner.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
import re
1010
from typing import List
1111
from huggingface_hub import HfApi, ModelInfo
12-
from transformers import AutoConfig, AutoModel
12+
13+
import contextlib
14+
import io
15+
with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
16+
from transformers import AutoConfig, AutoModel
1317

1418
class AttentionType(StrEnum):
1519
"""
@@ -537,4 +541,4 @@ def bytes_to_gib(bytes: int) -> float:
537541
Convert number of bytes to GiB
538542
"""
539543

540-
return bytes / (1024 ** 3)
544+
return bytes / (1024 ** 3)

scenarios/cicd/kind_sim_fb.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,4 @@ export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_MEM=100Mi
1919
export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf"
2020
export LLMDBENCH_DEPLOY_MODEL_LIST="facebook/opt-125m"
2121
export LLMDBENCH_HARNESS_PVC_SIZE=3Gi
22-
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true
2322
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY=24 # To pass capacity planner sanity checking

setup/env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ export LLMDBENCH_VLLM_GAIE_CHART_VERSION=${LLMDBENCH_VLLM_GAIE_CHART_VERSION:-v1
118118
#export LLMDBENCH_VLLM_GAIE_CHART_VERSION=${LLMDBENCH_VLLM_GAIE_CHART_VERSION:-v0.5.1}
119119

120120
# Gateway API and GAIE CRD versions
121-
export LLMDBENCH_GATEWAY_API_CRD_REVISION=${LLMDBENCH_GATEWAY_API_CRD_REVISION:-"v1.2.0"}
121+
export LLMDBENCH_GATEWAY_API_CRD_REVISION=${LLMDBENCH_GATEWAY_API_CRD_REVISION:-"v1.4.0"}
122122
export LLMDBENCH_GATEWAY_API_INFERENCE_EXTENSION_CRD_REVISION=${LLMDBENCH_GATEWAY_API_INFERENCE_EXTENSION_CRD_REVISION:-$LLMDBENCH_VLLM_GAIE_CHART_VERSION}
123123

124124
export LLMDBENCH_VLLM_MODELSERVICE_RELEASE=${LLMDBENCH_VLLM_MODELSERVICE_RELEASE:-"llmdbench"}

setup/steps/00_ensure_llm-d-infra.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
else:
1818
os.environ["PYTHONPATH"] = f"{config_explorer_src}:{setup_dir}:{workspace_root}"
1919

20-
print(f"Workspace root directory added to PYTHONPATH: {os.environ['PYTHONPATH']}")
20+
#print(f"Workspace root directory added to PYTHONPATH: {os.environ['PYTHONPATH']}")
2121

2222
# ---------------- Import local packages ----------------
2323
try:

0 commit comments

Comments
 (0)