@@ -111,9 +111,21 @@ export PVC_SIZE="${PVC_SIZE:-40Gi}"
111111# CPU request per vLLM replica
112112export VLLM_CPU_RESOURCES=" ${VLLM_CPU_RESOURCES:- 10} "
113113
114+ # Memory request per vLLM replica
115+ export VLLM_MEMORY_RESOURCES=" ${VLLM_MEMORY_RESOURCES:- 40Gi} "
116+
117+ # GPU memory utilization (optional, default is null)
118+ export VLLM_GPU_MEMORY_UTILIZATION=" ${VLLM_GPU_MEMORY_UTILIZATION:- null} "
119+
114120# Number of vLLM replicas
115121export VLLM_REPLICA_COUNT=" ${VLLM_REPLICA_COUNT:- 3} "
116122
123+ # Tensor parallel size (optional, default is null)
124+ export VLLM_TENSOR_PARALLEL_SIZE=" ${VLLM_TENSOR_PARALLEL_SIZE:- null} "
125+
126+ # Number of GPU per vLLM
127+ export VLLM_GPU_COUNT_PER_INSTANCE=" ${VLLM_GPU_COUNT_PER_INSTANCE:- 1} "
128+
117129# vLLM deployment name (derived from release + model)
118130export VLLM_DEPLOYMENT_NAME=" ${VLLM_HELM_RELEASE_NAME} -${MODEL_NAME_SAFE} "
119131
@@ -139,7 +151,7 @@ if [[ "$CLEAN" == "true" ]]; then
139151 # Delete inference schedulare and gateway resources.
140152 kustomize build deploy/environments/dev/kubernetes-kgateway | envsubst | kubectl -n " ${NAMESPACE} " delete --ignore-not-found=true -f -
141153 # Delete vllm resources.
142- helm uninstall vllm --namespace ${NAMESPACE}
154+ helm uninstall vllm --namespace ${NAMESPACE} --ignore-not-found
143155 exit 0
144156fi
145157
@@ -163,6 +175,11 @@ helm upgrade --install "$VLLM_HELM_RELEASE_NAME" "$VLLM_CHART_DIR" \
163175 --set vllm.model.label=" $MODEL_NAME_SAFE " \
164176 --set vllm.replicaCount=" $VLLM_REPLICA_COUNT " \
165177 --set vllm.resources.requests.cpu=" $VLLM_CPU_RESOURCES " \
178+ --set vllm.resources.requests.memory=" $VLLM_MEMORY_RESOURCES " \
179+ --set vllm.resources.requests." nvidia\.com/gpu" =" $VLLM_GPU_COUNT_PER_INSTANCE " \
180+ --set vllm.resources.limits." nvidia\.com/gpu" =" $VLLM_GPU_COUNT_PER_INSTANCE " \
181+ --set vllm.gpuMemoryUtilization=" ${VLLM_GPU_MEMORY_UTILIZATION} " \
182+ --set vllm.tensorParallelSize=" ${VLLM_TENSOR_PARALLEL_SIZE} " \
166183 --set persistence.enabled=true \
167184 --set persistence.size=" $PVC_SIZE " \
168185 --set redis.nameSuffix=" $REDIS_DEPLOYMENT_NAME " \
0 commit comments