Skip to content

Commit a8ebf33

Browse files
committed
fix: API bump
- old test was based on v1alph2 of GIE for infpool - new default is on v1 for infpool Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent c3e76ce commit a8ebf33

2 files changed

Lines changed: 46 additions & 6 deletions

File tree

deploy/install.sh

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ VALUES_FILE=${VALUES_FILE:-"$WVA_PROJECT/charts/workload-variant-autoscaler/valu
4343
# Controller instance identifier for multi-controller isolation (optional)
4444
# When set, adds controller_instance label to metrics and HPA selectors
4545
CONTROLLER_INSTANCE=${CONTROLLER_INSTANCE:-""}
46+
# InferencePool API group
47+
# inference.networking.k8s.io for v1, should be new default
48+
# inference.networking.x-k8s.io for v1alpha2
49+
POOL_GROUP=${POOL_GROUP:-"inference.networking.k8s.io"}
50+
if [ "$POOL_GROUP" = "inference.networking.k8s.io" ]; then
51+
POOL_VERSION="v1"
52+
elif [ "$POOL_GROUP" = "inference.networking.x-k8s.io" ]; then
53+
POOL_VERSION="v1alpha2"
54+
else
55+
log_error "Unknown POOL_GROUP: $POOL_GROUP (expected inference.networking.k8s.io or inference.networking.x-k8s.io)"
56+
exit 1
57+
fi
4658

4759
# llm-d Configuration
4860
LLM_D_OWNER=${LLM_D_OWNER:-"llm-d"}
@@ -612,18 +624,39 @@ deploy_second_model_infrastructure() {
612624

613625
# Create second InferencePool with different selector
614626
log_info "Creating second InferencePool: $POOL_NAME_2"
615-
cat <<EOF | kubectl apply -n $LLMD_NS -f -
616-
apiVersion: inference.networking.x-k8s.io/v1alpha2
627+
if [ "$POOL_VERSION" = "v1" ]; then
628+
cat <<EOF | kubectl apply -n $LLMD_NS -f -
629+
apiVersion: ${POOL_GROUP}/${POOL_VERSION}
630+
kind: InferencePool
631+
metadata:
632+
name: $POOL_NAME_2
633+
spec:
634+
selector:
635+
matchLabels:
636+
llm-d.ai/model-pool: "$MODEL_LABEL_2"
637+
llm-d.ai/inference-serving: "true"
638+
targetPorts:
639+
- number: 8000
640+
endpointPickerRef:
641+
name: ${POOL_NAME_2}-epp
642+
port:
643+
number: 9002
644+
EOF
645+
else
646+
cat <<EOF | kubectl apply -n $LLMD_NS -f -
647+
apiVersion: ${POOL_GROUP}/${POOL_VERSION}
617648
kind: InferencePool
618649
metadata:
619650
name: $POOL_NAME_2
620651
spec:
621652
targetPortNumber: 8000
622653
selector:
623654
llm-d.ai/model-pool: "$MODEL_LABEL_2"
655+
llm-d.ai/inference-serving: "true"
624656
extensionRef:
625657
name: ${POOL_NAME_2}-epp
626658
EOF
659+
fi
627660

628661
# Create EPP deployment for second pool
629662
log_info "Creating EPP deployment for second pool"
@@ -707,12 +740,14 @@ spec:
707740
matchLabels:
708741
app: ${MS_NAME_2}-decode
709742
llm-d.ai/model-pool: "$MODEL_LABEL_2"
743+
llm-d.ai/inference-serving: "true"
710744
template:
711745
metadata:
712746
labels:
713747
app: ${MS_NAME_2}-decode
714748
llm-d.ai/model-pool: "$MODEL_LABEL_2"
715749
llm-d.ai/model: "${MODEL_ID_2_SANITIZED}"
750+
llm-d.ai/inference-serving: "true"
716751
spec:
717752
containers:
718753
- name: vllm
@@ -764,7 +799,7 @@ spec:
764799
EOF
765800

766801
# Create InferenceModel for second model (maps model name to pool)
767-
# Note: InferenceModel CRD may not be available in all environments
802+
# TODO: InferenceModel only exists in inference.networking.x-k8s.io/v1alpha2, should use InfereceModelRewrite instead
768803
if kubectl get crd inferencemodels.inference.networking.x-k8s.io &>/dev/null; then
769804
log_info "Creating InferenceModel for second model"
770805
cat <<EOF | kubectl apply -n $LLMD_NS -f -
@@ -833,7 +868,11 @@ deploy_llm_d_infrastructure() {
833868

834869
if [ ! -d "$LLM_D_PROJECT" ]; then
835870
log_info "Cloning $LLM_D_PROJECT repository (release: $LLM_D_RELEASE)"
836-
git clone -b $LLM_D_RELEASE -- https://github.com/$LLM_D_OWNER/$LLM_D_PROJECT.git $LLM_D_PROJECT &> /dev/null
871+
if ! git clone -b "$LLM_D_RELEASE" -- https://github.com/$LLM_D_OWNER/$LLM_D_PROJECT.git "$LLM_D_PROJECT" 2>&1 | grep -v "Cloning into"; then
872+
log_error "Failed to clone $LLM_D_PROJECT repository (release: $LLM_D_RELEASE)"
873+
return 1
874+
fi
875+
log_success "Successfully cloned $LLM_D_PROJECT repository (release: $LLM_D_RELEASE)"
837876
fi
838877

839878
# Check for HF_TOKEN (use dummy for emulated deployments)
@@ -1073,6 +1112,7 @@ deploy_llm_d_infrastructure() {
10731112

10741113
# Deploy InferenceObjective for GIE queuing when flow control is enabled (scale-from-zero / e2e).
10751114
# Enables gateway-level queuing so inference_extension_flow_control_queue_size is populated.
1115+
# Note: InferenceObjective only exists in inference.networking.x-k8s.io v1alpha2
10761116
if [ "$ENABLE_SCALE_TO_ZERO" == "true" ] || [ "$E2E_TESTS_ENABLED" == "true" ]; then
10771117
if kubectl get crd inferenceobjectives.inference.networking.x-k8s.io &>/dev/null; then
10781118
local infobj_file="${WVA_PROJECT}/deploy/inference-objective-e2e.yaml"

internal/utils/pool/pool.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ func GetPoolGKNN(poolGroup string) (common.GKNN, error) {
154154
poolNamespace = "default"
155155
)
156156

157-
// Default to v1alpha2 group if empty
157+
// Default to v1 (since llm-d already by default use v1 than v1alpha2) if empty
158158
if poolGroup == "" {
159-
poolGroup = PoolGroupV1Alpha2
159+
poolGroup = PoolGroupV1
160160
}
161161

162162
// Validate poolGroup against valid values

0 commit comments

Comments
 (0)