Skip to content

Commit 7a980bc

Browse files
authored
generalize gaie presets (llm-d#301)
* generalize gaie presets * fix ci failure --------- Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
1 parent 8a9d3af commit 7a980bc

File tree

9 files changed

+270
-127
lines changed

9 files changed

+270
-127
lines changed

scenarios/inference-scheduling.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Fill in desired values
2+
# export LLMDBENCH_HF_TOKEN=
3+
# export LLMDBENCH_VLLM_COMMON_NAMESPACE=
4+
# export LLMDBENCH_CONTROL_WORK_DIR=
5+
6+
# INFERENCE SCHEDULING WELL LIT PATH
7+
# Based on https://github.com/llm-d-incubation/llm-d-infra/tree/main/quickstart/examples/inference-scheduling
8+
# Removed pod monitoring; can be added using LLMDBENCH_VLLM_MODELSERVICE_EXTRA_POD_CONFIG
9+
# Removed extra volumes metrics-volume and torch-compile-volume; they are not needed for this model and tested hardware.
10+
# Use LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUME_MOUNTS and LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUMES to add them if needed.
11+
12+
# IMPORTANT NOTE
13+
# All parameters not defined here or exported externally will be the default values found in setup/env.sh
14+
# Many commonly defined values were left blank (default) so that this scenario is applicable to as many environments as possible.
15+
16+
# Cluster specific configuration
17+
# export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=ocs-storagecluster-cephfs
18+
# export LLMDBENCH_VLLM_COMMON_AFFINITY='nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3'
19+
20+
# Model(s)
21+
# export LLMDBENCH_DEPLOY_MODEL_LIST="Qwen/Qwen3-0.6B"
22+
export LLMDBENCH_DEPLOY_MODEL_LIST=meta-llama/Llama-3.1-8B-Instruct
23+
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=20Gi
24+
25+
# Routing configuration (via gaie)
26+
LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE="plugins-v2.yaml"
27+
28+
# Routing configuration (via modelservice)
29+
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true
30+
export LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR=nixlv2
31+
32+
# Prefill and Decode configiration (via modelservice)
33+
34+
# export LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE="nvidia.com/gpu"
35+
36+
export LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML=$(mktemp)
37+
cat << EOF > $LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML
38+
- name: CUDA_VISIBLE_DEVICES
39+
value: "0"
40+
- name: UCX_TLS
41+
value: "cuda_ipc,cuda_copy,tcp"
42+
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
43+
value: "5557"
44+
- name: VLLM_LOGGING_LEVEL
45+
value: DEBUG
46+
EOF
47+
48+
export LLMDBENCH_VLLM_MODELSERVICE_EXTRA_CONTAINER_CONFIG=$(mktemp)
49+
cat << EOF > ${LLMDBENCH_VLLM_MODELSERVICE_EXTRA_CONTAINER_CONFIG}
50+
ports:
51+
- containerPort: 5557
52+
protocol: TCP
53+
- containerPort: 8200
54+
name: metrics
55+
protocol: TCP
56+
EOF
57+
58+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS=2
59+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_MODEL_COMMAND=vllmServe
60+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS=["--enforce-eager____--kv-transfer-config____{\\\"kv_connector\\\":\\\"NixlConnector\\\",\\\"kv_role\\\":\\\"kv_both\\\"}"]
61+
62+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS=0
63+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_MODEL_COMMAND=vllmServe
64+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS=["--enforce-eager____--kv-transfer-config____{\\\"kv_connector\\\":\\\"NixlConnector\\\",\\\"kv_role\\\":\\\"kv_both\\\"}"]

scenarios/precise-prefix-cache-aware.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33

44
export LLMDBENCH_DEPLOY_MODEL_LIST=meta-llama/Llama-3.1-70B-Instruct
55

6-
# Common parameters across standalone and llm-d (prefill and decode) pods
76
export LLMDBENCH_HARNESS_EXPERIMENT_PROFILE=shared_prefix_synthetic.yaml
7+
8+
# Common parameters across standalone and llm-d (prefill and decode) pods
89
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=1Ti
910
export LLMDBENCH_VLLM_COMMON_CPU_NR=16
1011
export LLMDBENCH_VLLM_COMMON_CPU_MEM=64Gi

scenarios/wide-ep-small.sh

Lines changed: 87 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,66 @@
33
# It's purpose is to drive development of setup/steps/09_deploy_via_modelservice.sh
44

55
# Fill in required/desired values
6-
export LLMDBENCH_HF_TOKEN=
6+
# export LLMDBENCH_HF_TOKEN=
77
# export LLMDBENCH_VLLM_COMMON_NAMESPACE=
88
# export LLMDBENCH_CONTROL_WORK_DIR=
99

10-
# Cluster specific configuration (fusion6/pokprod001)
11-
export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=ocs-storagecluster-cephfs
10+
# Cluster specific configuration
11+
# export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=ocs-storagecluster-cephfs
1212
export LLMDBENCH_VLLM_COMMON_AFFINITY='nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3'
1313

1414
# Model(s)
1515
export LLMDBENCH_DEPLOY_MODEL_LIST="Qwen/Qwen3-0.6B"
16-
# export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=800Gi
16+
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=20Gi
1717

18-
# modelservice configuration
18+
# Routing configuration (via modelservice)
1919

2020
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true
2121

2222
export LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR=nixlv2
2323
export LLMDBENCH_LLMD_ROUTINGSIDECAR_DEBUG_LEVEL=3
2424

25+
# Prefill and Decode configiration (via modelservice)
26+
2527
export LLMDBENCH_VLLM_MODELSERVICE_MULTINODE=true
2628

27-
export LLMDBENCH_VLLM_STANDALONE_VLLM_FUSED_MOE_CHUNK_SIZE="1024"
28-
export LLMDBENCH_VLLM_STANDALONE_DP_SIZE_LOCAL="2"
29-
export LLMDBENCH_VLLM_STANDALONE_TRITON_LIBCUDA_PATH="/usr/lib64"
30-
# export LLMDBENCH_VLLM_STANDALONE_HF_HUB_DISABLE_XET="1"
31-
export LLMDBENCH_VLLM_STANDALONE_VLLM_SKIP_P2P_CHECK="1"
32-
export LLMDBENCH_VLLM_STANDALONE_VLLM_RANDOMIZE_DP_DUMMY_INPUTS="1"
33-
export LLMDBENCH_VLLM_STANDALONE_VLLM_USE_DEEP_GEMM="1"
34-
export LLMDBENCH_VLLM_STANDALONE_VLLM_ALL2ALL_BACKEND="deepep_low_latency"
35-
export LLMDBENCH_VLLM_STANDALONE_NVIDIA_GDRCOPY="enabled"
36-
export LLMDBENCH_VLLM_STANDALONE_NVSHMEM_DEBUG="INFO"
37-
export LLMDBENCH_VLLM_STANDALONE_NVSHMEM_REMOTE_TRANSPORT="ibgda"
38-
export LLMDBENCH_VLLM_STANDALONE_NVSHMEM_IB_ENABLE_IBGDA="true"
39-
export LLMDBENCH_VLLM_STANDALONE_NVSHMEM_BOOTSTRAP_UID_SOCK_IFNAME="eth0"
40-
export LLMDBENCH_VLLM_STANDALONE_GLOO_SOCKET_IFNAME="eth0"
41-
export LLMDBENCH_VLLM_STANDALONE_NCCL_SOCKET_IFNAME="eth0"
42-
export LLMDBENCH_VLLM_STANDALONE_NCCL_IB_HCA="ibp"
43-
export LLMDBENCH_VLLM_STANDALONE_VLLM_LOGGING_LEVEL="INFO"
44-
# export LLMDBENCH_VLLM_STANDALONE_HF_HUB_CACHE="/huggingface-cache"
45-
export LLMDBENCH_VLLM_STANDALONE_HF_HUB_CACHE="/model-cache/models"
29+
export LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML=$(mktemp)
30+
cat << EOF > $LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML
31+
- name: VLLM_FUSED_MOE_CHUNK_SIZE
32+
value: "1024"
33+
- name: DP_SIZE_LOCAL
34+
value: "2"
35+
- name: TRITON_LIBCUDA_PATH
36+
value: "/usr/lib64"
37+
- name: VLLM_SKIP_P2P_CHECK
38+
value: "1"
39+
- name: VLLM_RANDOMIZE_DP_DUMMY_INPUTS
40+
value: "1"
41+
- name: VLLM_USE_DEEP_GEMM
42+
value: "1"
43+
- name: VLLM_ALL2ALL_BACKEND
44+
value: "deepep_low_latency"
45+
- name: NVIDIA_GDRCOPY
46+
value: "enabled"
47+
- name: NVSHMEM_DEBUG
48+
value: "INFO"
49+
- name: NVSHMEM_REMOTE_TRANSPORT
50+
value: "ibgda"
51+
- name: NVSHMEM_IB_ENABLE_IBGDA
52+
value: "true"
53+
- name: NVSHMEM_BOOTSTRAP_UID_SOCK_IFNAME
54+
value: "eth0"
55+
- name: GLOO_SOCKET_IFNAME
56+
value: "eth0"
57+
- name: NCCL_SOCKET_IFNAME
58+
value: "eth0"
59+
- name: NCCL_IB_HCA
60+
value: "ibp"
61+
- name: VLLM_LOGGING_LEVEL
62+
value: "INFO"
63+
- name: HF_HUB_CACHE
64+
value: "/model-cache/models"
65+
EOF
4666

4767
# export LLMDBENCH_VLLM_MODELSERVICE_MOUNT_MODEL_VOLUME_OVERRIDE=false
4868
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS=1
@@ -52,40 +72,45 @@ export LLMDBENCH_VLLM_MODELSERVICE_DECODE_MODEL_COMMAND=custom
5272
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS=$(mktemp)
5373
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS
5474
START_RANK=\$(( \${LWS_WORKER_INDEX:-0} * DP_SIZE_LOCAL ))
55-
5675
source /opt/vllm/bin/activate
57-
exec vllm serve \
58-
/model-cache/models/Qwen/Qwen3-0.6B \
59-
--port 8200 \
60-
--disable-log-requests \
61-
--disable-uvicorn-access-log \
62-
--enable-expert-parallel \
63-
--data-parallel-hybrid-lb \
64-
--tensor-parallel-size \$TP_SIZE \
65-
--data-parallel-size \$((LWS_GROUP_SIZE * DP_SIZE_LOCAL)) \
66-
--data-parallel-size-local \$DP_SIZE_LOCAL \
67-
--data-parallel-address \${LWS_LEADER_ADDRESS} \
68-
--data-parallel-rpc-port 5555 \
69-
--data-parallel-start-rank \$START_RANK \
70-
--trust-remote-code \
71-
--kv_transfer_config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
76+
exec vllm serve /model-cache/models/Qwen/Qwen3-0.6B \
77+
--port 8200 \
78+
--disable-log-requests \
79+
--disable-uvicorn-access-log \
80+
--enable-expert-parallel \
81+
--data-parallel-hybrid-lb \
82+
--tensor-parallel-size \$TP_SIZE \
83+
--data-parallel-size \$((LWS_GROUP_SIZE * DP_SIZE_LOCAL)) \
84+
--data-parallel-size-local \$DP_SIZE_LOCAL \
85+
--data-parallel-address \${LWS_LEADER_ADDRESS} \
86+
--data-parallel-rpc-port 5555 \
87+
--data-parallel-start-rank \$START_RANK \
88+
--trust-remote-code \
89+
--kv_transfer_config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
7290
EOF
73-
export LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML="LLMDBENCH_VLLM_STANDALONE_VLLM_FUSED_MOE_CHUNK_SIZE,LLMDBENCH_VLLM_STANDALONE_DP_SIZE_LOCAL,LLMDBENCH_VLLM_STANDALONE_TRITON_LIBCUDA_PATH,LLMDBENCH_VLLM_STANDALONE_VLLM_SKIP_P2P_CHECK,LLMDBENCH_VLLM_STANDALONE_VLLM_RANDOMIZE_DP_DUMMY_INPUTS,LLMDBENCH_VLLM_STANDALONE_VLLM_USE_DEEP_GEMM,LLMDBENCH_VLLM_STANDALONE_VLLM_ALL2ALL_BACKEND,LLMDBENCH_VLLM_STANDALONE_NVIDIA_GDRCOPY,LLMDBENCH_VLLM_STANDALONE_NVSHMEM_DEBUG,LLMDBENCH_VLLM_STANDALONE_NVSHMEM_REMOTE_TRANSPORT,LLMDBENCH_VLLM_STANDALONE_NVSHMEM_IB_ENABLE_IBGDA,LLMDBENCH_VLLM_STANDALONE_NVSHMEM_BOOTSTRAP_UID_SOCK_IFNAME,LLMDBENCH_VLLM_STANDALONE_GLOO_SOCKET_IFNAME,LLMDBENCH_VLLM_STANDALONE_NCCL_SOCKET_IFNAME,LLMDBENCH_VLLM_STANDALONE_NCCL_IB_HCA,LLMDBENCH_VLLM_STANDALONE_VLLM_LOGGING_LEVEL,LLMDBENCH_VLLM_STANDALONE_HF_HUB_CACHE"
7491
export LLMDBENCH_VLLM_MODELSERVICE_EXTRA_CONTAINER_CONFIG=$(mktemp)
7592
cat << EOF > ${LLMDBENCH_VLLM_MODELSERVICE_EXTRA_CONTAINER_CONFIG}
7693
workingDir: /code
7794
imagePullPolicy: Always
78-
# securityContext:
79-
# runAsUser: 0
80-
# runAsGroup: 0
81-
# capabilities:
82-
# add:
83-
# - "IPC_LOCK"
84-
# - "SYS_RAWIO"
8595
EOF
96+
8697
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE="nvidia.com/gpu"
8798
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_NR=2
8899

100+
export LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUME_MOUNTS=$(mktemp)
101+
cat << EOF > ${LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUME_MOUNTS}
102+
- name: dshm
103+
mountPath: /dev/shm
104+
EOF
105+
106+
export LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUMES=$(mktemp)
107+
cat << EOF > ${LLMDBENCH_VLLM_MODELSERVICE_EXTRA_VOLUMES}
108+
- name: dshm
109+
emptyDir:
110+
medium: Memory
111+
sizeLimit: 1Gi
112+
EOF
113+
89114
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS=1
90115
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM=1
91116
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM=1
@@ -95,19 +120,18 @@ cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS
95120
START_RANK=\$(( \${LWS_WORKER_INDEX:-0} * DP_SIZE_LOCAL ))
96121
97122
source /opt/vllm/bin/activate
98-
exec vllm serve \
99-
Qwen/Qwen3-0.6B \
100-
--port 8000 \
101-
--disable-log-requests \
102-
--disable-uvicorn-access-log \
103-
--enable-expert-parallel \
104-
--data-parallel-hybrid-lb \
105-
--tensor-parallel-size \$TP_SIZE \
106-
--data-parallel-size \$((LWS_GROUP_SIZE * DP_SIZE_LOCAL)) \
107-
--data-parallel-size-local \$DP_SIZE_LOCAL \
108-
--data-parallel-address \${LWS_LEADER_ADDRESS} \
109-
--data-parallel-rpc-port 5555 \
110-
--data-parallel-start-rank \$START_RANK \
111-
--trust-remote-code \
112-
--kv_transfer_config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
123+
exec vllm serve /model-cache/models/Qwen/Qwen3-0.6B \
124+
--port 8000 \
125+
--disable-log-requests \
126+
--disable-uvicorn-access-log \
127+
--enable-expert-parallel \
128+
--data-parallel-hybrid-lb \
129+
--tensor-parallel-size \$TP_SIZE \
130+
--data-parallel-size \$((LWS_GROUP_SIZE * DP_SIZE_LOCAL)) \
131+
--data-parallel-size-local \$DP_SIZE_LOCAL \
132+
--data-parallel-address \${LWS_LEADER_ADDRESS} \
133+
--data-parallel-rpc-port 5555 \
134+
--data-parallel-start-rank \$START_RANK \
135+
--trust-remote-code \
136+
--kv_transfer_config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
113137
EOF

setup/env.sh

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ export LLMDBENCH_VLLM_INFRA_CHART_NAME=${LLMDBENCH_VLLM_INFRA_CHART_NAME:-oci://
9696
export LLMDBENCH_VLLM_INFRA_CHART_VERSION=${LLMDBENCH_VLLM_INFRA_CHART_VERSION:-1.0.6}
9797
export LLMDBENCH_VLLM_GAIE_CHART_NAME=${LLMDBENCH_VLLM_GAIE_CHART_NAME:-oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool}
9898
export LLMDBENCH_VLLM_GAIE_CHART_VERSION=${LLMDBENCH_VLLM_GAIE_CHART_VERSION:-v0.5.0}
99+
100+
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE=${LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE:-"default-plugins.yaml"}
101+
102+
if [[ -v LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS ]]; then
103+
echo "ℹ️ Deprecated environment variable \"LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS\"; use \"LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE\" instead."
104+
fi
105+
99106
export LLMDBENCH_VLLM_MODELSERVICE_RELEASE=${LLMDBENCH_VLLM_MODELSERVICE_RELEASE:-"llmdbench"}
100107
export LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE=${LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE:-"default-values.yaml"}
101108
export LLMDBENCH_VLLM_MODELSERVICE_ADDITIONAL_SETS=${LLMDBENCH_VLLM_MODELSERVICE_ADDITIONAL_SETS:-""}
@@ -323,18 +330,6 @@ if [[ -n "$overridevarlist" ]]; then
323330
export LLMDBENCH_CONTROL_OVERRIDE_COMMAND_DISPLAYED=1
324331
fi
325332

326-
if [[ "$LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS" == /* ]]; then
327-
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_FULL_PATH=$(echo $LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS'.yaml' | $LLMDBENCH_CONTROL_SCMD 's^.yaml.yaml^.yaml^g')
328-
else
329-
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_FULL_PATH=$(echo ${LLMDBENCH_MAIN_DIR}/setup/presets/gaie/$LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS'.yaml' | $LLMDBENCH_CONTROL_SCMD 's^.yaml.yaml^.yaml^g')
330-
fi
331-
if [[ ! -f $LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_FULL_PATH ]]; then
332-
echo "❌ GAIE presets file \"$LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_FULL_PATH\" could not be found."
333-
exit 1
334-
else
335-
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS=$(echo $LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_FULL_PATH | rev | cut -d '/' -f 1 | rev)
336-
fi
337-
338333
if [[ ! -z $LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS ]]; then
339334
if [[ "$LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS" == /* ]]; then
340335
export LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS_FULL_PATH=$(echo $LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS'.yaml' | $LLMDBENCH_CONTROL_SCMD 's^.yaml.yaml^.yaml^g')

setup/functions.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,3 +662,17 @@ def get_image(image_registry: str, image_repo: str, image_name: str, image_tag:
662662
return is_latest_tag
663663
else:
664664
return f"{image_registry}/{image_repo}/{image_name}:{is_latest_tag}"
665+
666+
def add_config(obj_or_filename, num_spaces=0, label=""):
667+
spaces = " " * num_spaces
668+
contents = ""
669+
indented_contents = ""
670+
try:
671+
with open(obj_or_filename, 'r') as f:
672+
contents = f.read()
673+
except FileNotFoundError:
674+
# not a file
675+
contents = obj_or_filename
676+
677+
indented_contents = '\n'.join(f"{spaces}{line}" for line in contents.splitlines())
678+
return indented_contents

setup/functions.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,41 @@ function add_config {
378378
echo ""
379379
fi
380380
echo "$(cat $object_to_render)" | $LLMDBENCH_CONTROL_SCMD -e "s^\\n^\\\\\n^g" | $LLMDBENCH_CONTROL_SCMD -e "s#^#$spacec#g"
381+
else
382+
echo ${object_to_render}
383+
fi
384+
}
385+
export -f add_config
386+
387+
# make sure things are defined; should be easier with python
388+
function add_config_prep {
389+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_POD_CONFIG} ]]; then
390+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_POD_CONFIG="#no config"
391+
fi
392+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_CONTAINER_CONFIG} ]]; then
393+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_CONTAINER_CONFIG="#no config"
394+
fi
395+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUME_MOUNTS} ]]; then
396+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUME_MOUNTS="[]"
397+
fi
398+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUMES} ]]; then
399+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUMES="[]"
400+
fi
401+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_POD_CONFIG} ]]; then
402+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_POD_CONFIG="#no config"
403+
fi
404+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_CONTAINER_CONFIG} ]]; then
405+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_CONTAINER_CONFIG="#no config"
406+
fi
407+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUME_MOUNTS} ]]; then
408+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUME_MOUNTS="[]"
409+
fi
410+
if [[ -z ${LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUMES} ]]; then
411+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUMES="[]"
381412
fi
382413
}
414+
export -f add_config
415+
383416

384417
function add_command {
385418
local model_command=$1

0 commit comments

Comments
 (0)