Skip to content

Commit b0010c6

Browse files
authored
LeaderWorkerSet Support (llm-d#910)
* Add shuynh2017 to the OWNERS list * lws support2 * address comments * fix merge * update doc * fix test error * install lws on openshift * fix e2e test * fix bug: missing a change for scaletarget * fix e2e test * fix lws test to use same pool as deployment * address comments * remove flaky from test * double go test timeout * skip multi-var saturation for lws just like for deployment * bump timeout to 60m * mark Scale-From-Zero tests as flaky - never worked locally with main branch code * add missing rbac * fix merge issues * fix import * add scale-from-zero LWS * add 1 leader, 0 worker tests, clean env before run
1 parent 78bc8c9 commit b0010c6

57 files changed

Lines changed: 6491 additions & 518 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

charts/workload-variant-autoscaler/templates/hpa.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,18 @@ metadata:
88
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
99
spec:
1010
scaleTargetRef:
11+
{{- if eq .Values.llmd.scaleTargetKind "LeaderWorkerSet" }}
12+
apiVersion: leaderworkerset.x-k8s.io/v1
13+
kind: LeaderWorkerSet
14+
{{- else }}
1115
apiVersion: apps/v1
1216
kind: Deployment
13-
name: {{ printf "%s-decode" .Values.llmd.modelName }}
17+
{{- end }}
18+
{{- if .Values.llmd.scaleTargetName }}
19+
name: {{ .Values.llmd.scaleTargetName }}
20+
{{- else }}
21+
name: {{ .Values.llmd.deploymentName | default (printf "%s-decode" .Values.llmd.modelName) }}
22+
{{- end }}
1423
minReplicas: {{ .Values.hpa.minReplicas }}
1524
maxReplicas: {{ .Values.hpa.maxReplicas }}
1625
behavior:

charts/workload-variant-autoscaler/templates/rbac/role.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,31 @@ rules:
7575
- patch
7676
- update
7777
- watch
78+
- apiGroups:
79+
- apps
80+
resources:
81+
- statefulsets
82+
verbs:
83+
- get
84+
- list
85+
- watch
86+
- apiGroups:
87+
- leaderworkerset.x-k8s.io
88+
resources:
89+
- leaderworkersets
90+
verbs:
91+
- get
92+
- list
93+
- patch
94+
- update
95+
- watch
96+
- apiGroups:
97+
- leaderworkerset.x-k8s.io
98+
resources:
99+
- leaderworkersets/scale
100+
verbs:
101+
- get
102+
- update
78103
- apiGroups:
79104
- apps
80105
resources:

charts/workload-variant-autoscaler/templates/variantautoscaling.yaml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,22 @@ metadata:
1616
# This is essentially static input to the optimizer
1717
spec:
1818
# ScaleTargetRef references the target resource to scale (similar to HPA)
19-
# TODO: Support templating for scaleTargetRef to enable managing groups of deployments
2019
scaleTargetRef:
20+
{{- if eq .Values.llmd.scaleTargetKind "LeaderWorkerSet" }}
21+
apiVersion: leaderworkerset.x-k8s.io/v1
22+
kind: LeaderWorkerSet
23+
{{- else }}
2124
apiVersion: apps/v1
2225
kind: Deployment
26+
{{- end }}
27+
{{- if .Values.llmd.scaleTargetName }}
28+
name: {{ .Values.llmd.scaleTargetName }}
29+
{{- else }}
2330
name: {{ .Values.llmd.deploymentName | default (printf "%s-decode" .Values.llmd.modelName) }}
31+
{{- end }}
32+
2433
# OpenAI API compatible name of the model
2534
modelID: {{ .Values.llmd.modelID | quote }}
2635
# Cost per replica for this variant (used in saturation analysis)
2736
variantCost: {{ .Values.va.variantCost | default "10.0" | quote }}
28-
{{- end }}
37+
{{- end }}

charts/workload-variant-autoscaler/values-dev.yaml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,20 @@ llmd:
5959
namespace: llm-d-autoscaler
6060
modelName: ms-workload-autoscaler-llm-d-modelservice
6161
modelID: "Qwen/Qwen3-0.6B"
62+
63+
# scaleTargetKind: Optional. If not specified or empty, it will be set to "Deployment".
64+
# Valid values are "Deployment", "LeaderWorkerSet" - case sensitive.
65+
#scaleTargetKind: "Deployment"
66+
67+
# scaleTargetName: Name of the scale target resource.
68+
# For "Deployment": name of the Deployment.
69+
# For "LeaderWorkerSet": name of the LeaderWorkerSet.
70+
#scaleTargetName:
71+
6272
va:
6373
enabled: true
6474
# accelerator: Optional. If not specified, it will be auto-discovered
65-
# from target deployment. If specified, it will be used as fall-back value if it can't
75+
# from scale target. If specified, it will be used as fall-back value if it can't
6676
# be discovered.
6777
accelerator: H100
6878
sloTpot: 10

charts/workload-variant-autoscaler/values.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,19 @@ llmd:
8888
modelName: ms-workload-autoscaler-llm-d-modelservice
8989
modelID: "Qwen/Qwen3-0.6B"
9090

91+
# scaleTargetKind: Optional. If not specified or empty, it will be set to "Deployment".
92+
# Valid values are "Deployment", "LeaderWorkerSet" - case sensitive.
93+
#scaleTargetKind: "Deployment"
94+
95+
# scaleTargetName: Name of the scale target resource.
96+
# For "Deployment": name of the Deployment.
97+
# For "LeaderWorkerSet": name of the LeaderWorkerSet.
98+
#scaleTargetName:
99+
91100
va:
92101
enabled: true
93102
# accelerator: Optional. If not specified, it will be auto-discovered
94-
# from target deployment. If specified, it will be used as fall-back value if it can't
103+
# from scale target. If specified, it will be used as fall-back value if it can't
95104
# be discovered.
96105
accelerator: H100
97106
# Cost per replica in arbitrary units (higher = more expensive to scale)

cmd/main.go

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,13 @@ import (
3030
// to ensure that exec-entrypoint and run can make use of them.
3131
_ "k8s.io/client-go/plugin/pkg/client/auth"
3232

33+
"github.com/go-logr/logr"
3334
flag "github.com/spf13/pflag"
3435
"k8s.io/apimachinery/pkg/runtime"
3536
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
37+
"k8s.io/client-go/discovery"
3638
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
39+
"k8s.io/client-go/rest"
3740
ctrl "sigs.k8s.io/controller-runtime"
3841
"sigs.k8s.io/controller-runtime/pkg/cache"
3942
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
@@ -48,6 +51,7 @@ import (
4851
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/collector/source"
4952
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/collector/source/prometheus"
5053
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/config"
54+
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/constants"
5155
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/controller"
5256
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/controller/indexers"
5357
"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/datastore"
@@ -63,6 +67,7 @@ import (
6367
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
6468
inferencePoolV1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
6569
inferencePoolV1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
70+
lwsv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
6671
//+kubebuilder:scaffold:imports
6772
)
6873

@@ -76,9 +81,44 @@ func init() {
7681
utilruntime.Must(promoperator.AddToScheme(scheme))
7782
utilruntime.Must(inferencePoolV1.Install(scheme))
7883
utilruntime.Must(inferencePoolV1alpha2.Install(scheme))
84+
// Note: LeaderWorkerSet scheme is added conditionally in main() after checking if CRD exists
7985
//+kubebuilder:scaffold:scheme
8086
}
8187

88+
// checkLeaderWorkerSetCRD checks if the LeaderWorkerSet CRD is installed in the cluster
89+
// TODO: this is checked once at start up for now. We should handle LWS installed after controller starts.
90+
func checkLeaderWorkerSetCRD(restConfig *rest.Config, logger logr.Logger) bool {
91+
discoveryClient, err := discovery.NewDiscoveryClientForConfig(restConfig)
92+
if err != nil {
93+
logger.Error(err, "failed to create discovery client for CRD detection - assuming LWS not installed")
94+
return false
95+
}
96+
97+
// Check if leaderworkersets.leaderworkerset.x-k8s.io CRD exists
98+
_, apiLists, err := discoveryClient.ServerGroupsAndResources()
99+
if err != nil {
100+
// Partial errors are common (e.g., unavailable API services), so check if we got any results
101+
if apiLists == nil {
102+
logger.Error(err, "failed to discover API resources - assuming LWS not installed")
103+
return false
104+
}
105+
// Log but continue with partial results
106+
logger.V(1).Info("partial error discovering API resources (this is usually fine)", "error", err)
107+
}
108+
109+
for _, apiList := range apiLists {
110+
if apiList.GroupVersion == constants.LeaderWorkerSetAPIVersion {
111+
for _, resource := range apiList.APIResources {
112+
if resource.Kind == constants.LeaderWorkerSetKind {
113+
return true
114+
}
115+
}
116+
}
117+
}
118+
119+
return false
120+
}
121+
82122
// nolint:gocyclo
83123
func main() {
84124
// Command-line flags
@@ -153,6 +193,18 @@ func main() {
153193
}
154194
setupLog.Info("Configuration loaded successfully")
155195

196+
// Conditionally add LeaderWorkerSet scheme if CRD exists
197+
lwsEnabled := checkLeaderWorkerSetCRD(restConfig, setupLog)
198+
if lwsEnabled {
199+
if err := lwsv1.AddToScheme(scheme); err != nil {
200+
setupLog.Error(err, "failed to add LeaderWorkerSet scheme")
201+
os.Exit(1)
202+
}
203+
setupLog.Info("LeaderWorkerSet CRD detected - support enabled")
204+
} else {
205+
setupLog.Info("LeaderWorkerSet CRD not found - support disabled (Deployment-only mode)")
206+
}
207+
156208
// if the enable-http2 flag is false (the default), http/2 should be disabled
157209
// due to its vulnerabilities. More specifically, disabling http/2 will
158210
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
@@ -425,13 +477,14 @@ func main() {
425477
}
426478

427479
// Create the reconciler with unified Config and datastore
428-
reconciler := &controller.VariantAutoscalingReconciler{
429-
Client: mgr.GetClient(),
430-
Scheme: mgr.GetScheme(),
431-
Recorder: mgr.GetEventRecorderFor("workload-variant-autoscaler-controller-manager"),
432-
Config: cfg, // Pass unified Config to reconciler
433-
Datastore: ds, // Pass datastore for namespace tracking
434-
}
480+
reconciler := controller.NewVariantAutoscalingReconciler(
481+
mgr.GetClient(),
482+
mgr.GetScheme(),
483+
mgr.GetEventRecorderFor("workload-variant-autoscaler-controller-manager"),
484+
cfg,
485+
ds,
486+
lwsEnabled,
487+
)
435488

436489
// Setup the controller with the manager
437490
if err = reconciler.SetupWithManager(mgr); err != nil {

config/rbac/role.yaml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ rules:
1717
- ""
1818
resources:
1919
- configmaps/status
20-
- secrets
2120
verbs:
2221
- get
2322
- apiGroups:
@@ -32,6 +31,7 @@ rules:
3231
resources:
3332
- namespaces
3433
- pods
34+
- secrets
3535
- services
3636
verbs:
3737
- get
@@ -69,6 +69,7 @@ rules:
6969
- apps
7070
resources:
7171
- replicasets
72+
- statefulsets
7273
verbs:
7374
- get
7475
- list
@@ -82,6 +83,23 @@ rules:
8283
- get
8384
- list
8485
- watch
86+
- apiGroups:
87+
- leaderworkerset.x-k8s.io
88+
resources:
89+
- leaderworkersets
90+
verbs:
91+
- get
92+
- list
93+
- patch
94+
- update
95+
- watch
96+
- apiGroups:
97+
- leaderworkerset.x-k8s.io
98+
resources:
99+
- leaderworkersets/scale
100+
verbs:
101+
- get
102+
- update
85103
- apiGroups:
86104
- llmd.ai
87105
resources:
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Example: VariantAutoscaling with LeaderWorkerSet (LWS) as scale target
2+
# Ensure a LeaderWorkerSet named vllm-lws exists in llm-d-sim (e.g. from kind-emulator or e2e).
3+
#
4+
apiVersion: llmd.ai/v1alpha1
5+
kind: VariantAutoscaling
6+
metadata:
7+
name: sample-deployment
8+
namespace: llm-d-sim
9+
labels:
10+
inference.optimization/acceleratorName: A100
11+
spec:
12+
scaleTargetRef:
13+
apiVersion: leaderworkerset.x-k8s.io/v1
14+
kind: LeaderWorkerSet
15+
name: vllm-lws
16+
variantCost: "10.0"
17+
modelID: "meta/llama-3.1-70b"

deploy/kind-emulator/install.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ load_image() {
203203

204204
# Load the image into the KIND cluster
205205
kind load docker-image "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" --name "$CLUSTER_NAME"
206-
207206
log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME'"
208207
}
209208

@@ -295,6 +294,14 @@ deploy_wva_prerequisites() {
295294
VALUES_FILE="${WVA_PROJECT}/charts/workload-variant-autoscaler/values.yaml"
296295
fi
297296

297+
CHART_VERSION=0.8.0
298+
log_info "Installing LeaderWorkerSet version $CHART_VERSION into lws-system namespace"
299+
helm upgrade -i lws oci://registry.k8s.io/lws/charts/lws \
300+
--version=$CHART_VERSION \
301+
--namespace lws-system \
302+
--create-namespace \
303+
--wait --timeout 300s
304+
298305
log_success "WVA prerequisites complete"
299306
}
300307

@@ -363,4 +370,4 @@ delete_kind_cluster() {
363370
else
364371
log_warning "KIND cluster '${CLUSTER_NAME}' not found"
365372
fi
366-
}
373+
}

deploy/openshift/install.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,14 @@ deploy_wva_prerequisites() {
201201
local cert_subject=$(openssl x509 -in "$PROM_CA_CERT_PATH" -noout -subject 2>/dev/null | sed 's/subject=//' || echo "unknown")
202202
log_info "Certificate subject: $cert_subject"
203203
fi
204+
205+
CHART_VERSION=0.8.0
206+
log_info "Installing LeaderWorkerSet version $CHART_VERSION into lws-system namespace"
207+
helm upgrade -i lws oci://registry.k8s.io/lws/charts/lws \
208+
--version=$CHART_VERSION \
209+
--namespace lws-system \
210+
--create-namespace \
211+
--wait --timeout 300s
204212

205213
log_success "WVA prerequisites deployed"
206214
}

0 commit comments

Comments
 (0)