Skip to content

Commit e57bb43

Browse files
[Setup] feat: initial steps on the integration of llm-d-infra (llm-d#147)
* [Setup] feat: initial steps on the integration of `llm-d-infra` With the impending retirement of `llm-d-deployer`, our code needs to be modified to operate with the new `llm-d-infra`. The following modifications are implemented: 1) Clone `llm-d-infra` (`llm-d-deployer` is still there, but only temporarily) 2) Use `llmd-infra-installer` to deploy the base gateway infrastructure (the `helm` chart implicit invocation is expected to be idempotent). At this point, a `helmfile` is also created 3) Add a **new** step (7), where the `gateway inference endpoint` (gaie) is deployed (with the use of `helmfile`) 4) An initial version of the generation for the `model service`-specific `values.yaml` was also provided. This was added to `deploy_via_modelservice.sh` (from PR llm-d#122), but not fully integrated yet. 5) Several per-`pod` limits were added, allowing an user to specify different values for `prefill` and `decode` `pods` 6) Finally, the function `model_attribute` was enhanced **IMPORTANT**: the executable `helmfile` is now required Signed-off-by: maugustosilva <maugusto.silva@gmail.com> * Moved the checking of dependencies to step 0 Signed-off-by: maugustosilva <maugusto.silva@gmail.com> * Forgot to add `09_deploy_via_modelservice.sh` Signed-off-by: maugustosilva <maugusto.silva@gmail.com> --------- Signed-off-by: maugustosilva <maugusto.silva@gmail.com>
1 parent dfc2f55 commit e57bb43

19 files changed

+533
-204
lines changed

.github/workflows/benchmark1.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,23 @@ jobs:
6666
curl -L https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY} -o ${BINARY}
6767
chmod +x ${BINARY}
6868
sudo cp -f $(which yq) || sudo cp -f ${BINARY} /usr/local/bin/yq
69+
shell: bash
6970

7071
- name: Install make, skopeo, curl, jq
7172
run: |
7273
sudo apt-get update
7374
sudo apt-get install -y make skopeo curl jq rsync
7475
shell: bash
7576

77+
- name: Install helmfile
78+
run: |
79+
export VERSION=v0.144.0
80+
export BINARY=helmfile_linux_amd64
81+
curl -L https://github.com/roboll/helmfile/releases/download/$VERSION/helmfile_darwin_arm64 -o ${BINARY}
82+
chmod +x ${BINARY}
83+
sudo cp -f ${BINARY} /usr/local/bin/helmfile
84+
shell: bash
85+
7686
- name: Install oc
7787
run: |
7888
OC_FILE_NAME=openshift-client-$(uname -s | sed -e "s/Linux/linux/g" -e "s/Darwin/apple-darwin/g")$(echo "-$(uname -m)" | sed -e 's/-x86_64//g' -e 's/-amd64//g' -e 's/aarch64/arm64-rhel9/g').tar.gz
@@ -83,6 +93,7 @@ jobs:
8393
sudo chmod +x /usr/local/bin/oc
8494
sudo chmod +x /usr/local/bin/kubectl
8595
rm openshift-client-*.tar.gz
96+
shell: bash
8697

8798
- name: Install Kustomize
8899
uses: multani/action-setup-kustomize@v1
@@ -102,7 +113,7 @@ jobs:
102113

103114
- name: Install Helm
104115
run: |
105-
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 && chmod 700 get_helm.sh && ./get_helm.sh
116+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 && chmod 700 get_helm.sh && ./get_helm.sh && helm plugin install https://github.com/databus23/helm-diff
106117
shell: bash
107118

108119
- name: Cleanup target cloud (standalone)

scenarios/gke_A100_standalone_llama-3b.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@ export LLMDBENCH_HARNESS_NAME=inference-perf
1616
export LLMDBENCH_HARNESS_EXPERIMENT_PROFILE=chatbot_synthetic.yaml
1717
export LLMDBENCH_HARNESS_PVC_SIZE=1Ti
1818
export LLMDBENCH_IMAGE_REGISTRY=ghcr.io
19-
export LLMDBENCH_IMAGE_REPO=llm-d/llm-d-benchmark
19+
export LLMDBENCH_IMAGE_REPO=llm-d
20+
export LLMDBENCH_IMAGE_NAME=llm-d-benchmark
2021
export LLMDBENCH_IMAGE_TAG=v0.1.5

scenarios/gke_H100_deployer_llama-3b.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ export LLMDBENCH_HARNESS_NAME=inference-perf
1515
export LLMDBENCH_HARNESS_EXPERIMENT_PROFILE=chatbot_synthetic.yaml
1616
export LLMDBENCH_HARNESS_PVC_SIZE=1Ti
1717
export LLMDBENCH_IMAGE_REGISTRY=ghcr.io
18-
export LLMDBENCH_IMAGE_REPO=llm-d/llm-d-benchmark
19-
export LLMDBENCH_IMAGE_TAG=v0.1.5
18+
export LLMDBENCH_IMAGE_REPO=llm-d
19+
export LLMDBENCH_IMAGE_NAME=llm-d-benchmark
20+
export LLMDBENCH_IMAGE_TAG=v0.1.5

setup/env.sh

Lines changed: 73 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,43 @@ export LLMDBENCH_HF_TOKEN="${LLMDBENCH_HF_TOKEN:-}"
88

99
# Images
1010
export LLMDBENCH_IMAGE_REGISTRY=${LLMDBENCH_IMAGE_REGISTRY:-ghcr.io}
11-
export LLMDBENCH_IMAGE_REPO=${LLMDBENCH_IMAGE_REPO:-llm-d/llm-d-benchmark}
11+
export LLMDBENCH_IMAGE_REPO=${LLMDBENCH_IMAGE_REPO:-llm-d}
12+
export LLMDBENCH_IMAGE_NAME=${LLMDBENCH_IMAGE_NAME:-llm-d-benchmark}
1213
export LLMDBENCH_IMAGE_TAG=${LLMDBENCH_IMAGE_TAG:-auto}
1314
export LLMDBENCH_LLMD_IMAGE_REGISTRY=${LLMDBENCH_LLMD_IMAGE_REGISTRY:-ghcr.io}
14-
export LLMDBENCH_LLMD_IMAGE_REPO=${LLMDBENCH_LLMD_IMAGE_REPO:-llm-d/llm-d}
15+
export LLMDBENCH_LLMD_IMAGE_REPO=${LLMDBENCH_LLMD_IMAGE_REPO:-llm-d}
16+
export LLMDBENCH_LLMD_IMAGE_NAME=${LLMDBENCH_LLMD_IMAGE_REPO:-llm-d}
1517
export LLMDBENCH_LLMD_IMAGE_TAG=${LLMDBENCH_LLMD_IMAGE_TAG:-0.0.8}
1618
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REGISTRY=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REGISTRY:-ghcr.io}
17-
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO:-llm-d/llm-d-model-service}
19+
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO:-llm-d}
20+
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_NAME=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_NAME:-llm-d-model-service}
1821
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_TAG=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_TAG:-0.0.10}
1922
export LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REGISTRY=${LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REGISTRY:-ghcr.io}
20-
export LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REPO=${LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REPO:-llm-d/llm-d-inference-scheduler}
23+
export LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REPO=${LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_REPO:-llm-d}
24+
export LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_NAME=${LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_NAME:-llm-d-inference-scheduler}
2125
export LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_TAG=${LLMDBENCH_LLMD_INFERENCESCHEDULER_IMAGE_TAG:-0.0.4}
2226
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REGISTRY=${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REGISTRY:-ghcr.io}
23-
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REPO=${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REPO:-llm-d/llm-d-routing-sidecar}
27+
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REPO=${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REPO:-llm-d}
28+
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_NAME=${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_NAME:-llm-d-routing-sidecar}
2429
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG=${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG:-0.0.6}
2530
export LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REGISTRY=${LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REGISTRY:-ghcr.io}
26-
export LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REPO=${LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REPO:-llm-d/llm-d-inference-sim}
31+
export LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REPO=${LLMDBENCH_LLMD_INFERENCESIM_IMAGE_REPO:-llm-d}
32+
export LLMDBENCH_LLMD_INFERENCESIM_IMAGE_NAME=${LLMDBENCH_LLMD_INFERENCESIM_IMAGE_NAME:-llm-d-inference-sim}
2733
export LLMDBENCH_LLMD_INFERENCESIM_IMAGE_TAG=${LLMDBENCH_LLMD_INFERENCESIM_IMAGE_TAG:-v0.1.2}
28-
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY=${LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY:-vllm}
29-
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO=${LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO:-vllm-openai}
34+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY=${LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY:-docker.io}
35+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO=${LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO:-vllm}
36+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_NAME=${LLMDBENCH_VLLM_STANDALONE_IMAGE_NAME:-vllm-openai}
3037
export LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG=${LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG:-latest}
3138

3239
# External repositories
3340
export LLMDBENCH_DEPLOYER_GIT_REPO="${LLMDBENCH_DEPLOYER_GIT_REPO:-https://github.com/llm-d/llm-d-deployer.git}"
3441
export LLMDBENCH_DEPLOYER_DIR="${LLMDBENCH_DEPLOYER_DIR:-/tmp}"
3542
export LLMDBENCH_DEPLOYER_GIT_BRANCH="${LLMDBENCH_DEPLOYER_GIT_BRANCH:-main}"
43+
44+
export LLMDBENCH_INFRA_GIT_REPO="${LLMDBENCH_INFRA_GIT_REPO:-https://github.com/llm-d-incubation/llm-d-infra.git}"
45+
export LLMDBENCH_INFRA_DIR="${LLMDBENCH_INFRA_DIR:-/tmp}"
46+
export LLMDBENCH_INFRA_GIT_BRANCH="${LLMDBENCH_INFRA_GIT_BRANCH:-main}"
47+
3648
export LLMDBENCH_HARNESS_GIT_REPO="${LLMDBENCH_HARNESS_GIT_REPO:-auto}"
3749
export LLMDBENCH_HARNESS_DIR="${LLMDBENCH_HARNESS_DIR:-/tmp}"
3850
export LLMDBENCH_HARNESS_GIT_BRANCH="${LLMDBENCH_HARNESS_GIT_BRANCH:-main}"
@@ -42,6 +54,8 @@ export LLMDBENCH_VLLM_COMMON_NAMESPACE="${LLMDBENCH_VLLM_COMMON_NAMESPACE:-llmdb
4254
export LLMDBENCH_VLLM_COMMON_SERVICE_ACCOUNT="${LLMDBENCH_VLLM_COMMON_SERVICE_ACCOUNT:-default}"
4355

4456
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE=${LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE:-nvidia.com/gpu}
57+
export LLMDBENCH_VLLM_COMMON_NETWORK_RESOURCE=${LLMDBENCH_VLLM_COMMON_NETWORK_RESOURCE:-}
58+
export LLMDBENCH_VLLM_COMMON_NETWORK_NR=${LLMDBENCH_VLLM_COMMON_NETWORK_NR:-}
4559
export LLMDBENCH_VLLM_COMMON_AFFINITY=${LLMDBENCH_VLLM_COMMON_AFFINITY:-${LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE}.product:NVIDIA-H100-80GB-HBM3}
4660
export LLMDBENCH_VLLM_COMMON_REPLICAS=${LLMDBENCH_VLLM_COMMON_REPLICAS:-1}
4761
export LLMDBENCH_VLLM_COMMON_PERSISTENCE_ENABLED=${LLMDBENCH_VLLM_COMMON_PERSISTENCE_ENABLED:-true}
@@ -79,16 +93,34 @@ export LLMDBENCH_VLLM_STANDALONE_EPHEMERAL_STORAGE=${LLMDBENCH_VLLM_STANDALONE_E
7993
export LLMDBENCH_VLLM_DEPLOYER_VALUES_FILE=${LLMDBENCH_VLLM_DEPLOYER_VALUES_FILE:-"fromenv"}
8094
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_REPLICAS=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_REPLICAS:-1}
8195
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_EXTRA_ARGS=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_EXTRA_ARGS:-"[--disable-log-requests]"}
96+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_ACCELERATOR_NR=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_ACCELERATOR_NR:-$LLMDBENCH_VLLM_COMMON_ACCELERATOR_NR}
97+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_ACCELERATOR_MEM_UTIL=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_ACCELERATOR_MEM_UTIL:-$LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEM_UTIL}
98+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_NETWORK_RESOURCE=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_NETWORK_RESOURCE:-$LLMDBENCH_VLLM_COMMON_NETWORK_RESOURCE}
99+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_NETWORK_NR=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_NETWORK_NR:-$LLMDBENCH_VLLM_COMMON_NETWORK_NR}
100+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_CPU_NR=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_CPU_NR:-$LLMDBENCH_VLLM_COMMON_CPU_NR}
101+
export LLMDBENCH_VLLM_DEPLOYER_PREFILL_CPU_MEM=${LLMDBENCH_VLLM_DEPLOYER_PREFILL_CPU_MEM:-$LLMDBENCH_VLLM_COMMON_CPU_MEM}
82102
export LLMDBENCH_VLLM_DEPLOYER_DECODE_REPLICAS=${LLMDBENCH_VLLM_DEPLOYER_DECODE_REPLICAS:-1}
83103
export LLMDBENCH_VLLM_DEPLOYER_DECODE_EXTRA_ARGS=${LLMDBENCH_VLLM_DEPLOYER_DECODE_EXTRA_ARGS:-"[--disable-log-requests]"}
84-
export LLMDBENCH_VLLM_DEPLOYER_BASECONFIGMAPREFNAME=${LLMDBENCH_VLLM_DEPLOYER_BASECONFIGMAPREFNAME:-"basic-gpu-with-nixl-and-redis-lookup-preset"}
85-
export LLMDBENCH_VLLM_DEPLOYER_MODELSERVICE_REPLICAS=${LLMDBENCH_VLLM_DEPLOYER_MODELSERVICE_REPLICAS:-1}
86-
export LLMDBENCH_VLLM_DEPLOYER_ROUTE=${LLMDBENCH_VLLM_DEPLOYER_ROUTE:-1}
104+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_ACCELERATOR_NR=${LLMDBENCH_VLLM_DEPLOYER_DECODE_ACCELERATOR_NR:-$LLMDBENCH_VLLM_COMMON_ACCELERATOR_NR}
105+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_ACCELERATOR_MEM_UTIL=${LLMDBENCH_VLLM_DEPLOYER_DECODE_ACCELERATOR_MEM_UTIL:-$LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEM_UTIL}
106+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_NETWORK_RESOURCE=${LLMDBENCH_VLLM_DEPLOYER_DECODE_NETWORK_RESOURCE:-$LLMDBENCH_VLLM_COMMON_NETWORK_RESOURCE}
107+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_NETWORK_NR=${LLMDBENCH_VLLM_DEPLOYER_DECODE_NETWORK_NR:-$LLMDBENCH_VLLM_COMMON_NETWORK_NR}
108+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_CPU_NR=${LLMDBENCH_VLLM_DEPLOYER_DECODE_CPU_NR:-$LLMDBENCH_VLLM_COMMON_CPU_NR}
109+
export LLMDBENCH_VLLM_DEPLOYER_DECODE_CPU_MEM=${LLMDBENCH_VLLM_DEPLOYER_DECODE_CPU_MEM:-$LLMDBENCH_VLLM_COMMON_CPU_MEM}
87110
export LLMDBENCH_VLLM_DEPLOYER_GATEWAY_CLASS_NAME=${LLMDBENCH_VLLM_DEPLOYER_GATEWAY_CLASS_NAME:-kgateway}
88111
export LLMDBENCH_VLLM_DEPLOYER_RELEASE=${LLMDBENCH_VLLM_DEPLOYER_RELEASE:-"llm-d"}
112+
export LLMDBENCH_VLLM_DEPLOYER_ROUTE=${LLMDBENCH_VLLM_DEPLOYER_ROUTE:-1}
113+
114+
# FIXME (start) delete after removal of llm-d-deployer
115+
export LLMDBENCH_VLLM_DEPLOYER_BASECONFIGMAPREFNAME=${LLMDBENCH_VLLM_DEPLOYER_BASECONFIGMAPREFNAME:-"basic-gpu-with-nixl-and-redis-lookup-preset"}
116+
export LLMDBENCH_VLLM_DEPLOYER_MODELSERVICE_REPLICAS=${LLMDBENCH_VLLM_DEPLOYER_MODELSERVICE_REPLICAS:-1}
89117
export LLMDBENCH_VLLM_DEPLOYER_RECONFIGURE_GATEWAY_AFTER_DEPLOY=${LLMDBENCH_VLLM_DEPLOYER_RECONFIGURE_GATEWAY_AFTER_DEPLOY:-0}
118+
# FIXME (end) delete after removal of llm-d-deployer
90119

91120
# Endpoint Picker Parameters, Deployer-specific
121+
export LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS=${LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS:-"default"}
122+
123+
# FIXME (start) delete after removal of llm-d-deployer
92124
export LLMDBENCH_VLLM_DEPLOYER_EPP_ENABLE_KVCACHE_AWARE_SCORER=${LLMDBENCH_VLLM_DEPLOYER_EPP_ENABLE_KVCACHE_AWARE_SCORER:-false}
93125
export LLMDBENCH_VLLM_DEPLOYER_EPP_KVCACHE_AWARE_SCORER_WEIGHT=${LLMDBENCH_VLLM_DEPLOYER_EPP_KVCACHE_AWARE_SCORER_WEIGHT:-1}
94126
export LLMDBENCH_VLLM_DEPLOYER_EPP_ENABLE_PREFIX_AWARE_SCORER=${LLMDBENCH_VLLM_DEPLOYER_EPP_ENABLE_PREFIX_AWARE_SCORER:-true}
@@ -115,6 +147,7 @@ export LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_ENABLE_PREFIX_AWARE_SCORER=${LLMDBENCH
115147
export LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_PREFIX_AWARE_SCORER_WEIGHT=${LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_PREFIX_AWARE_SCORER_WEIGHT:-1}
116148
export LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_ENABLE_SESSION_AWARE_SCORER=${LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_ENABLE_SESSION_AWARE_SCORER:-false}
117149
export LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_SESSION_AWARE_SCORER_WEIGHT=${LLMDBENCH_VLLM_DEPLOYER_EPP_DECODE_SESSION_AWARE_SCORER_WEIGHT:-1}
150+
# FIXME (end) delete after removal of llm-d-deployer
118151

119152
# Modelservice (helm chart) specific parameters
120153
export LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE=${LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE:-"default-values.yaml"}
@@ -182,12 +215,15 @@ function model_attribute {
182215
esac
183216

184217
local modelcomponents=$(echo $model | cut -d '/' -f 2 | tr '[:upper:]' '[:lower:]' | $LLMDBENCH_CONTROL_SCMD -e 's^qwen^qwen-^g' -e 's^-^\n^g')
218+
local provider=$(echo $model | cut -d '/' -f 1)
185219
local type=$(echo "${modelcomponents}" | grep -Ei "nstruct|hf|chat|speech|vision")
186-
local parameters=$(echo "${modelcomponents}" | grep -Ei "[0-9].*b" | $LLMDBENCH_CONTROL_SCMD -e 's^a^^' -e 's^\.^p^')
187-
local majorversion=$(echo "${modelcomponents}" | grep -Ei "^[0-9]" | grep -Evi "b|E" | cut -d '.' -f 1)
220+
local parameters=$(echo "${modelcomponents}" | grep -Ei "[0-9].*b|[0-9].*m" | $LLMDBENCH_CONTROL_SCMD -e 's^a^^' -e 's^\.^p^')
221+
local majorversion=$(echo "${modelcomponents}" | grep -Ei "^[0-9]" | grep -Evi "b|E" | $LLMDBENCH_CONTROL_SCMD -e "s/$parameters//g" | cut -d '.' -f 1)
188222
local kind=$(echo "${modelcomponents}" | head -n 1 | cut -d '/' -f 1)
189-
local label=${kind}-${majorversion}-${parameters}
190223
local as_label=$(echo $model | tr '[:upper:]' '[:lower:]' | $LLMDBENCH_CONTROL_SCMD -e "s^/^-^g")
224+
local label=$(echo ${kind}-${majorversion}-${parameters} | $LLMDBENCH_CONTROL_SCMD -e 's^-$^^g' -e 's^--^^g')
225+
local as_label=$(echo $model | tr '[:upper:]' '[:lower:]' | $LLMDBENCH_CONTROL_SCMD -e "s^/^-^g" -e "s^\.^-^g")
226+
local folder=$(echo $model | tr '[:upper:]' '[:lower:]' | $LLMDBENCH_CONTROL_SCMD -e 's^/^_^g' -e 's^-^_^g')
191227

192228
if [[ $attribute != "model" ]];
193229
then
@@ -255,45 +291,29 @@ else
255291
fi
256292
fi
257293

258-
if [[ $LLMDBENCH_CONTROL_DEPENDENCIES_CHECKED -eq 0 && ! -f ~/.llmdbench_dependencies_checked ]]
259-
then
260-
deplist="$LLMDBENCH_CONTROL_SCMD $LLMDBENCH_CONTROL_PCMD $LLMDBENCH_CONTROL_KCMD $LLMDBENCH_CONTROL_HCMD kubectl kustomize rsync"
261-
echo "Checking dependencies \"$deplist\""
262-
for req in $deplist kubectl kustomize; do
263-
echo -n "Checking dependency \"${req}\"..."
264-
is_req=$(which ${req} || true)
265-
if [[ -z ${is_req} ]]; then
266-
echo "❌ Dependency \"${req}\" is missing"
267-
exit 1
268-
fi
269-
echo "done"
270-
done
271-
touch ~/.llmdbench_dependencies_checked
272-
export LLMDBENCH_CONTROL_DEPENDENCIES_CHECKED=1
273-
fi
274-
275294
function get_image {
276295
local image_registry=$1
277296
local image_repo=$2
278-
local image_tag=$3
279-
local tag_only=${4:-0}
297+
local image_name=$3
298+
local image_tag=$4
299+
local tag_only=${5:-0}
280300

281301
is_latest_tag=$image_tag
282302
if [[ $image_tag == "auto" ]]; then
283303
if [[ $LLMDBENCH_CONTROL_CCMD == "podman" ]]; then
284-
is_latest_tag=$($LLMDBENCH_CONTROL_CCMD search --list-tags ${image_registry}/${image_repo} | tail -1 | awk '{ print $2 }' || true)
304+
is_latest_tag=$($LLMDBENCH_CONTROL_CCMD search --list-tags ${image_registry}/${image_repo}/${image_name} | tail -1 | awk '{ print $2 }' || true)
285305
else
286-
is_latest_tag=$(skopeo list-tags docker://${image_registry}/${image_repo} | jq -r .Tags[] | tail -1)
306+
is_latest_tag=$(skopeo list-tags docker://${image_registry}/${image_repo}/${image_name} | jq -r .Tags[] | tail -1)
287307
fi
288308
if [[ -z ${is_latest_tag} ]]; then
289-
echo "❌ Unable to find latest tag for image \"${image_registry}/${image_repo}\""
309+
echo "❌ Unable to find latest tag for image \"${image_registry}/${image_repo}/${image_name}\""
290310
exit 1
291311
fi
292312
fi
293313
if [[ $tag_only -eq 1 ]]; then
294314
echo ${is_latest_tag}
295315
else
296-
echo $image_registry/$image_repo:${is_latest_tag}
316+
echo $image_registry/$image_repo/${image_name}:${is_latest_tag}
297317
fi
298318
}
299319

@@ -321,6 +341,18 @@ if [[ ! -z $LLMDBENCH_DEPLOY_SCENARIO ]]; then
321341
fi
322342
fi
323343

344+
if [[ "$LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS" == /* ]]; then
345+
export LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS_FULL_PATH=$(echo $LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS'.yaml' | $LLMDBENCH_CONTROL_SCMD 's^.yaml.yaml^.yaml^g')
346+
else
347+
export LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS_FULL_PATH=$(echo ${LLMDBENCH_MAIN_DIR}/setup/presets/gaie/$LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS'.yaml' | $LLMDBENCH_CONTROL_SCMD 's^.yaml.yaml^.yaml^g')
348+
fi
349+
if [[ ! -f $LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS_FULL_PATH ]]; then
350+
echo "❌ GAIE presets file \"$LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS_FULL_PATH\" could not be found."
351+
exit 1
352+
else
353+
export LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS=$(echo $LLMDBENCH_VLLM_DEPLOYER_GAIE_PRESETS_FULL_PATH | rev | cut -d '/' -f 1 | rev)
354+
fi
355+
324356
overridevarlist=$(env | grep _CLIOVERRIDE_ | cut -d '=' -f 1 || true)
325357
if [[ -n "$overridevarlist" ]]; then
326358
for overridevar in $overridevarlist; do
@@ -356,6 +388,7 @@ export LLMDBENCH_CONTROL_WORK_DIR_SET=${LLMDBENCH_CONTROL_WORK_DIR_SET:-0}
356388

357389
function prepare_work_dir {
358390
mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/setup/yamls
391+
mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/setup/helm
359392
mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/setup/commands
360393
mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/environment
361394
mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/workload/harnesses
@@ -746,9 +779,9 @@ create_or_update_hf_secret() {
746779
}
747780
export -f create_or_update_hf_secret
748781

749-
#
782+
#
750783
# vLLM Model Download Utilities
751-
#
784+
#
752785

753786
validate_and_create_pvc() {
754787
local kcmd="$1"
@@ -882,7 +915,7 @@ wait_for_download_job() {
882915
announce "🙀 Pod did not become Ready"
883916
llmdbench_execute_cmd "${kcmd} logs job/download-model -n ${namespace}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE} 0 1 0
884917
exit 1
885-
fi
918+
fi
886919

887920
announce "⏳ Waiting up to ${timeout}s for job to complete..."
888921
llmdbench_execute_cmd "${kcmd} wait --for=condition=complete --timeout="${timeout}"s job/download-model -n ${namespace}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}

setup/presets/gaie/default.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha1
2+
kind: EndpointPickerConfig
3+
plugins:
4+
- type: low-queue-filter
5+
parameters:
6+
threshold: 128
7+
- type: lora-affinity-filter
8+
parameters:
9+
threshold: 0.999
10+
- type: least-queue-filter
11+
- type: least-kv-cache-filter
12+
- type: decision-tree-filter
13+
name: low-latency-filter
14+
parameters:
15+
current:
16+
pluginRef: low-queue-filter
17+
nextOnSuccess:
18+
decisionTree:
19+
current:
20+
pluginRef: lora-affinity-filter
21+
nextOnSuccessOrFailure:
22+
decisionTree:
23+
current:
24+
pluginRef: least-queue-filter
25+
nextOnSuccessOrFailure:
26+
decisionTree:
27+
current:
28+
pluginRef: least-kv-cache-filter
29+
nextOnFailure:
30+
decisionTree:
31+
current:
32+
pluginRef: least-queue-filter
33+
nextOnSuccessOrFailure:
34+
decisionTree:
35+
current:
36+
pluginRef: lora-affinity-filter
37+
nextOnSuccessOrFailure:
38+
decisionTree:
39+
current:
40+
pluginRef: least-kv-cache-filter
41+
- type: random-picker
42+
parameters:
43+
maxNumOfEndpoints: 1
44+
- type: single-profile-handler
45+
schedulingProfiles:
46+
- name: default
47+
plugins:
48+
- pluginRef: low-latency-filter
49+
- pluginRef: random-picker

setup/presets/gaie/pd.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha1
2+
kind: EndpointPickerConfig
3+
plugins:
4+
- type: prefill-header-handler
5+
- type: prefix-cache-scorer
6+
parameters:
7+
hashBlockSize: 5
8+
maxPrefixBlocksToMatch: 256
9+
lruCapacityPerServer: 31250
10+
- type: prefill-filter
11+
- type: decode-filter
12+
- type: max-score-picker
13+
- type: pd-profile-handler
14+
parameters:
15+
threshold: 10
16+
hashBlockSize: 5
17+
schedulingProfiles:
18+
- name: prefill
19+
plugins:
20+
- pluginRef: prefill-filter
21+
- pluginRef: max-score-picker
22+
- pluginRef: prefix-cache-scorer
23+
weight: 50
24+
- name: decode
25+
plugins:
26+
- pluginRef: decode-filter
27+
- pluginRef: max-score-picker
28+
- pluginRef: prefix-cache-scorer
29+
weight: 50

0 commit comments

Comments
 (0)