mamy-CS
diff --git a/‎charts/workload-variant-autoscaler/templates/hpa.yaml‎
Lines changed: 10 additions & 1 deletion b/‎charts/workload-variant-autoscaler/templates/hpa.yaml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎charts/workload-variant-autoscaler/templates/rbac/role.yaml‎
Lines changed: 25 additions & 0 deletions b/‎charts/workload-variant-autoscaler/templates/rbac/role.yaml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎charts/workload-variant-autoscaler/templates/variantautoscaling.yaml‎
Lines changed: 11 additions & 2 deletions b/‎charts/workload-variant-autoscaler/templates/variantautoscaling.yaml‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎charts/workload-variant-autoscaler/values-dev.yaml‎
Lines changed: 11 additions & 1 deletion b/‎charts/workload-variant-autoscaler/values-dev.yaml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎charts/workload-variant-autoscaler/values.yaml‎
Lines changed: 10 additions & 1 deletion b/‎charts/workload-variant-autoscaler/values.yaml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎cmd/main.go‎
Lines changed: 60 additions & 7 deletions b/‎cmd/main.go‎
Lines changed: 60 additions & 7 deletions
diff --git a/‎config/rbac/role.yaml‎
Lines changed: 19 additions & 1 deletion b/‎config/rbac/role.yaml‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎config/samples/variantautoscaling-with-lws.yaml‎
Lines changed: 17 additions & 0 deletions b/‎config/samples/variantautoscaling-with-lws.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎deploy/kind-emulator/install.sh‎
Lines changed: 9 additions & 2 deletions b/‎deploy/kind-emulator/install.sh‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎deploy/openshift/install.sh‎
Lines changed: 8 additions & 0 deletions b/‎deploy/openshift/install.sh‎
Lines changed: 8 additions & 0 deletions
@@ -8,9 +8,18 @@ metadata:
     {{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
 spec:
   scaleTargetRef:
+    {{- if eq .Values.llmd.scaleTargetKind "LeaderWorkerSet" }}
+    apiVersion: leaderworkerset.x-k8s.io/v1
+    kind: LeaderWorkerSet
+    {{- else }}
     apiVersion: apps/v1
     kind: Deployment
-    name: {{ printf "%s-decode" .Values.llmd.modelName }}
+    {{- end }}
+    {{- if .Values.llmd.scaleTargetName }}
+    name: {{ .Values.llmd.scaleTargetName }}
+    {{- else }}
+    name: {{ .Values.llmd.deploymentName | default (printf "%s-decode" .Values.llmd.modelName) }}
+    {{- end }}
   minReplicas: {{ .Values.hpa.minReplicas }}
   maxReplicas: {{ .Values.hpa.maxReplicas }}
   behavior:
 
@@ -75,6 +75,31 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - apps
+  resources:
+  - statefulsets
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - leaderworkerset.x-k8s.io
+  resources:
+  - leaderworkersets
+  verbs:
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - leaderworkerset.x-k8s.io
+  resources:
+  - leaderworkersets/scale
+  verbs:
+  - get
+  - update
 - apiGroups:
   - apps
   resources:
 
@@ -16,13 +16,22 @@ metadata:
 # This is essentially static input to the optimizer
 spec:
   # ScaleTargetRef references the target resource to scale (similar to HPA)
-  # TODO: Support templating for scaleTargetRef to enable managing groups of deployments
   scaleTargetRef:
+    {{- if eq .Values.llmd.scaleTargetKind "LeaderWorkerSet" }}
+    apiVersion: leaderworkerset.x-k8s.io/v1
+    kind: LeaderWorkerSet
+    {{- else }}
     apiVersion: apps/v1
     kind: Deployment
+    {{- end }}
+    {{- if .Values.llmd.scaleTargetName }}
+    name: {{ .Values.llmd.scaleTargetName }}
+    {{- else }}
     name: {{ .Values.llmd.deploymentName | default (printf "%s-decode" .Values.llmd.modelName) }}
+    {{- end }}
+
   # OpenAI API compatible name of the model
   modelID: {{ .Values.llmd.modelID | quote }}
   # Cost per replica for this variant (used in saturation analysis)
   variantCost: {{ .Values.va.variantCost | default "10.0" | quote }}
-{{- end }}
+{{- end }}
@@ -59,10 +59,20 @@ llmd:
   namespace: llm-d-autoscaler
   modelName: ms-workload-autoscaler-llm-d-modelservice
   modelID: "Qwen/Qwen3-0.6B"
+
+  # scaleTargetKind: Optional. If not specified or empty, it will be set to "Deployment". 
+  # Valid values are "Deployment", "LeaderWorkerSet" - case sensitive.
+  #scaleTargetKind: "Deployment"
+  
+  # scaleTargetName: Name of the scale target resource.
+  # For "Deployment": name of the Deployment.
+  # For "LeaderWorkerSet": name of the LeaderWorkerSet.
+  #scaleTargetName: 
+  
 va:
   enabled: true
   # accelerator: Optional. If not specified, it will be auto-discovered
-  # from target deployment. If specified, it will be used as fall-back value if it can't 
+  # from scale target. If specified, it will be used as fall-back value if it can't 
   # be discovered.
   accelerator: H100
   sloTpot: 10
 
@@ -88,10 +88,19 @@ llmd:
   modelName: ms-workload-autoscaler-llm-d-modelservice
   modelID: "Qwen/Qwen3-0.6B"
 
+  # scaleTargetKind: Optional. If not specified or empty, it will be set to "Deployment". 
+  # Valid values are "Deployment", "LeaderWorkerSet" - case sensitive.
+  #scaleTargetKind: "Deployment"
+  
+  # scaleTargetName: Name of the scale target resource.
+  # For "Deployment": name of the Deployment.
+  # For "LeaderWorkerSet": name of the LeaderWorkerSet.
+  #scaleTargetName: 
+
 va:
   enabled: true
   # accelerator: Optional. If not specified, it will be auto-discovered
-  # from target deployment. If specified, it will be used as fall-back value if it can't 
+  # from scale target. If specified, it will be used as fall-back value if it can't 
   # be discovered.
   accelerator: H100
   # Cost per replica in arbitrary units (higher = more expensive to scale)
 
@@ -30,10 +30,13 @@ import (
 	// to ensure that exec-entrypoint and run can make use of them.
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 
+	"github.com/go-logr/logr"
 	flag "github.com/spf13/pflag"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/client-go/discovery"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/cache"
 	"sigs.k8s.io/controller-runtime/pkg/certwatcher"
@@ -48,6 +51,7 @@ import (
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/collector/source"
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/collector/source/prometheus"
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/config"
+	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/constants"
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/controller"
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/controller/indexers"
 	"github.com/llm-d/llm-d-workload-variant-autoscaler/internal/datastore"
@@ -63,6 +67,7 @@ import (
 	crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
 	inferencePoolV1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	inferencePoolV1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
+	lwsv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -76,9 +81,44 @@ func init() {
 	utilruntime.Must(promoperator.AddToScheme(scheme))
 	utilruntime.Must(inferencePoolV1.Install(scheme))
 	utilruntime.Must(inferencePoolV1alpha2.Install(scheme))
+	// Note: LeaderWorkerSet scheme is added conditionally in main() after checking if CRD exists
 	//+kubebuilder:scaffold:scheme
 }
 
+// checkLeaderWorkerSetCRD checks if the LeaderWorkerSet CRD is installed in the cluster
+// TODO: this is checked once at start up for now. We should handle LWS installed after controller starts.
+func checkLeaderWorkerSetCRD(restConfig *rest.Config, logger logr.Logger) bool {
+	discoveryClient, err := discovery.NewDiscoveryClientForConfig(restConfig)
+	if err != nil {
+		logger.Error(err, "failed to create discovery client for CRD detection - assuming LWS not installed")
+		return false
+	}
+
+	// Check if leaderworkersets.leaderworkerset.x-k8s.io CRD exists
+	_, apiLists, err := discoveryClient.ServerGroupsAndResources()
+	if err != nil {
+		// Partial errors are common (e.g., unavailable API services), so check if we got any results
+		if apiLists == nil {
+			logger.Error(err, "failed to discover API resources - assuming LWS not installed")
+			return false
+		}
+		// Log but continue with partial results
+		logger.V(1).Info("partial error discovering API resources (this is usually fine)", "error", err)
+	}
+
+	for _, apiList := range apiLists {
+		if apiList.GroupVersion == constants.LeaderWorkerSetAPIVersion {
+			for _, resource := range apiList.APIResources {
+				if resource.Kind == constants.LeaderWorkerSetKind {
+					return true
+				}
+			}
+		}
+	}
+
+	return false
+}
+
 // nolint:gocyclo
 func main() {
 	// Command-line flags
@@ -153,6 +193,18 @@ func main() {
 	}
 	setupLog.Info("Configuration loaded successfully")
 
+	// Conditionally add LeaderWorkerSet scheme if CRD exists
+	lwsEnabled := checkLeaderWorkerSetCRD(restConfig, setupLog)
+	if lwsEnabled {
+		if err := lwsv1.AddToScheme(scheme); err != nil {
+			setupLog.Error(err, "failed to add LeaderWorkerSet scheme")
+			os.Exit(1)
+		}
+		setupLog.Info("LeaderWorkerSet CRD detected - support enabled")
+	} else {
+		setupLog.Info("LeaderWorkerSet CRD not found - support disabled (Deployment-only mode)")
+	}
+
 	// if the enable-http2 flag is false (the default), http/2 should be disabled
 	// due to its vulnerabilities. More specifically, disabling http/2 will
 	// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
@@ -425,13 +477,14 @@ func main() {
 	}
 
 	// Create the reconciler with unified Config and datastore
-	reconciler := &controller.VariantAutoscalingReconciler{
-		Client:    mgr.GetClient(),
-		Scheme:    mgr.GetScheme(),
-		Recorder:  mgr.GetEventRecorderFor("workload-variant-autoscaler-controller-manager"),
-		Config:    cfg, // Pass unified Config to reconciler
-		Datastore: ds,  // Pass datastore for namespace tracking
-	}
+	reconciler := controller.NewVariantAutoscalingReconciler(
+		mgr.GetClient(),
+		mgr.GetScheme(),
+		mgr.GetEventRecorderFor("workload-variant-autoscaler-controller-manager"),
+		cfg,
+		ds,
+		lwsEnabled,
+	)
 
 	// Setup the controller with the manager
 	if err = reconciler.SetupWithManager(mgr); err != nil {
 
@@ -17,7 +17,6 @@ rules:
   - ""
   resources:
   - configmaps/status
-  - secrets
   verbs:
   - get
 - apiGroups:
@@ -32,6 +31,7 @@ rules:
   resources:
   - namespaces
   - pods
+  - secrets
   - services
   verbs:
   - get
@@ -69,6 +69,7 @@ rules:
   - apps
   resources:
   - replicasets
+  - statefulsets
   verbs:
   - get
   - list
@@ -82,6 +83,23 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - leaderworkerset.x-k8s.io
+  resources:
+  - leaderworkersets
+  verbs:
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - leaderworkerset.x-k8s.io
+  resources:
+  - leaderworkersets/scale
+  verbs:
+  - get
+  - update
 - apiGroups:
   - llmd.ai
   resources:
 
@@ -0,0 +1,17 @@
+# Example: VariantAutoscaling with LeaderWorkerSet (LWS) as scale target
+# Ensure a LeaderWorkerSet named vllm-lws exists in llm-d-sim (e.g. from kind-emulator or e2e).
+# 
+apiVersion: llmd.ai/v1alpha1
+kind: VariantAutoscaling
+metadata:
+  name: sample-deployment
+  namespace: llm-d-sim
+  labels:
+    inference.optimization/acceleratorName: A100
+spec:
+  scaleTargetRef:
+    apiVersion: leaderworkerset.x-k8s.io/v1
+    kind: LeaderWorkerSet
+    name: vllm-lws
+  variantCost: "10.0"
+  modelID: "meta/llama-3.1-70b"
@@ -203,7 +203,6 @@ load_image() {
 
     # Load the image into the KIND cluster
     kind load docker-image "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" --name "$CLUSTER_NAME"
-    
     log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME'"
 }
 
@@ -295,6 +294,14 @@ deploy_wva_prerequisites() {
         VALUES_FILE="${WVA_PROJECT}/charts/workload-variant-autoscaler/values.yaml"
     fi
 
+    CHART_VERSION=0.8.0
+    log_info "Installing LeaderWorkerSet version $CHART_VERSION into lws-system namespace"
+    helm upgrade -i lws oci://registry.k8s.io/lws/charts/lws \
+        --version=$CHART_VERSION \
+        --namespace lws-system \
+        --create-namespace \
+        --wait --timeout 300s
+
     log_success "WVA prerequisites complete"
 }
 
@@ -363,4 +370,4 @@ delete_kind_cluster() {
     else
         log_warning "KIND cluster '${CLUSTER_NAME}' not found"
     fi
-}
+}
@@ -201,6 +201,14 @@ deploy_wva_prerequisites() {
         local cert_subject=$(openssl x509 -in "$PROM_CA_CERT_PATH" -noout -subject 2>/dev/null | sed 's/subject=//' || echo "unknown")
         log_info "Certificate subject: $cert_subject"
     fi
+
+    CHART_VERSION=0.8.0
+    log_info "Installing LeaderWorkerSet version $CHART_VERSION into lws-system namespace"
+    helm upgrade -i lws oci://registry.k8s.io/lws/charts/lws \
+        --version=$CHART_VERSION \
+        --namespace lws-system \
+        --create-namespace \
+        --wait --timeout 300s
 
     log_success "WVA prerequisites deployed"
 }