Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ CONTROLLER_INSTANCE=${CONTROLLER_INSTANCE:-""}
# llm-d Configuration
LLM_D_OWNER=${LLM_D_OWNER:-"llm-d"}
LLM_D_PROJECT=${LLM_D_PROJECT:-"llm-d"}
LLM_D_RELEASE=${LLM_D_RELEASE:-"v0.3.0"}
LLM_D_RELEASE=${LLM_D_RELEASE:-"main"}
LLM_D_MODELSERVICE_NAME=${LLM_D_MODELSERVICE_NAME:-"ms-$WELL_LIT_PATH_NAME-llm-d-modelservice"}
LLM_D_EPP_NAME=${LLM_D_EPP_NAME:-"gaie-$WELL_LIT_PATH_NAME-epp"}
CLIENT_PREREQ_DIR=${CLIENT_PREREQ_DIR:-"$WVA_PROJECT/$LLM_D_PROJECT/guides/prereq/client-setup"}
Expand Down
25 changes: 25 additions & 0 deletions deploy/lib/infra_llmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,31 @@ deploy_llm_d_infrastructure() {
log_info "Skipping image patch: llm-d-inference-scheduler already using $LLM_D_INFERENCE_SCHEDULER_IMG"
fi

# Align EPP --pool-group with the GA InferencePool API group.
# The inferencepool Helm chart v1.4.0 creates InferencePool in
# inference.networking.k8s.io/v1, but the llm-d-inference-scheduler
# image defaults --pool-group to inference.networking.x-k8s.io.
local TARGET_POOL_GROUP="inference.networking.k8s.io"
if kubectl get deployment "$LLM_D_EPP_NAME" -n "$LLMD_NS" -o json \
| jq -e --arg group "$TARGET_POOL_GROUP" \
'.spec.template.spec.containers[0].args | any(. == $group)' &>/dev/null; then
log_info "EPP --pool-group already set to $TARGET_POOL_GROUP"
else
log_info "Patching EPP --pool-group to $TARGET_POOL_GROUP (GA InferencePool API)"
local NEW_ARGS
NEW_ARGS=$(kubectl get deployment "$LLM_D_EPP_NAME" -n "$LLMD_NS" -o json \
| jq -c --arg group "$TARGET_POOL_GROUP" '
.spec.template.spec.containers[0].args as $a |
[range(0; $a | length) | . as $i | $a[$i] |
select(. != "--pool-group") |
select($i == 0 or $a[$i - 1] != "--pool-group") |
select(startswith("--pool-group=") | not)
] + ["--pool-group", $group]')
kubectl patch deployment "$LLM_D_EPP_NAME" -n "$LLMD_NS" --type='json' \
-p="[{\"op\": \"replace\", \"path\": \"/spec/template/spec/containers/0/args\", \"value\": $NEW_ARGS}]"
log_success "EPP --pool-group patched to $TARGET_POOL_GROUP"
fi

# Enable flowControl feature gate in the EPP ConfigMap
if kubectl get configmap "$LLM_D_EPP_NAME" -n "$LLMD_NS" &> /dev/null; then
# Check if flowControl is already enabled
Expand Down
Loading