envoyproxy · altaiezior · Nov 2, 2025 · Nov 2, 2025 · Nov 3, 2025 · Nov 3, 2025
@@ -11,15 +11,17 @@ import gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 // +union
 //
 // +kubebuilder:validation:XValidation:rule="self.type == 'ConsistentHash' ? has(self.consistentHash) : !has(self.consistentHash)",message="If LoadBalancer type is consistentHash, consistentHash field needs to be set."
-// +kubebuilder:validation:XValidation:rule="self.type in ['Random', 'ConsistentHash'] ? !has(self.slowStart) : true ",message="Currently SlowStart is only supported for RoundRobin and LeastRequest load balancers."
-// +kubebuilder:validation:XValidation:rule="self.type == 'ConsistentHash' ? !has(self.zoneAware) : true ",message="Currently ZoneAware is only supported for LeastRequest, Random, and RoundRobin load balancers."
+// +kubebuilder:validation:XValidation:rule="self.type == 'BackendUtilization' ? has(self.backendUtilization) : !has(self.backendUtilization)",message="If LoadBalancer type is BackendUtilization, backendUtilization field needs to be set."
+// +kubebuilder:validation:XValidation:rule="self.type in ['Random', 'ConsistentHash'] ? !has(self.slowStart) : true ",message="Currently SlowStart is only supported for RoundRobin, LeastRequest, and BackendUtilization load balancers."
+// +kubebuilder:validation:XValidation:rule="self.type in ['ConsistentHash', 'BackendUtilization'] ? !has(self.zoneAware) : true ",message="Currently ZoneAware is only supported for LeastRequest, Random, and RoundRobin load balancers."
 type LoadBalancer struct {
 	// Type decides the type of Load Balancer policy.
 	// Valid LoadBalancerType values are
 	// "ConsistentHash",
 	// "LeastRequest",
 	// "Random",
-	// "RoundRobin".
+	// "RoundRobin",
+	// "BackendUtilization".
 	//
 	// +unionDiscriminator
 	Type LoadBalancerType `json:"type"`
@@ -29,6 +31,12 @@ type LoadBalancer struct {
 	// +optional
 	ConsistentHash *ConsistentHash `json:"consistentHash,omitempty"`
 
+	// BackendUtilization defines the configuration when the load balancer type is
+	// set to BackendUtilization.
+	//
+	// +optional
+	BackendUtilization *BackendUtilization `json:"backendUtilization,omitempty"`
+
 	// EndpointOverride defines the configuration for endpoint override.
 	// When specified, the load balancer will attempt to route requests to endpoints
 	// based on the override information extracted from request headers or metadata.
@@ -39,7 +47,7 @@ type LoadBalancer struct {
 
 	// SlowStart defines the configuration related to the slow start load balancer policy.
 	// If set, during slow start window, traffic sent to the newly added hosts will gradually increase.
-	// Currently this is only supported for RoundRobin and LeastRequest load balancers
+	// Supported for RoundRobin, LeastRequest, and BackendUtilization load balancers.
 	//
 	// +optional
 	SlowStart *SlowStart `json:"slowStart,omitempty"`
@@ -51,7 +59,7 @@ type LoadBalancer struct {
 }
 
 // LoadBalancerType specifies the types of LoadBalancer.
-// +kubebuilder:validation:Enum=ConsistentHash;LeastRequest;Random;RoundRobin
+// +kubebuilder:validation:Enum=ConsistentHash;LeastRequest;Random;RoundRobin;BackendUtilization
 type LoadBalancerType string
 
 const (
@@ -63,6 +71,8 @@ const (
 	RandomLoadBalancerType LoadBalancerType = "Random"
 	// RoundRobinLoadBalancerType load balancer policy.
 	RoundRobinLoadBalancerType LoadBalancerType = "RoundRobin"
+	// BackendUtilizationLoadBalancerType load balancer policy.
+	BackendUtilizationLoadBalancerType LoadBalancerType = "BackendUtilization"
 )
 
 // ConsistentHash defines the configuration related to the consistent hash
@@ -148,6 +158,63 @@ type Cookie struct {
 	Attributes map[string]string `json:"attributes,omitempty"`
 }
 
+// BackendUtilization defines configuration for Envoy's Backend Utilization policy.
+// It uses Open Resource Cost Application (ORCA) load metrics reported by endpoints to make load balancing decisions.
+// These metrics are typically sent by the backend service in response headers or trailers.
+//
+// The backend should report these metrics in header/trailer as one of the following formats:
+// - Binary: `endpoint-load-metrics-bin` with base64-encoded serialized `OrcaLoadReport` proto.
+// - JSON: `endpoint-load-metrics` with JSON-encoded `OrcaLoadReport` proto, e.g., `JSON {"cpu_utilization": 0.3}`.
+// - TEXT: `endpoint-load-metrics` with comma-separated key-value pairs, e.g., `TEXT cpu=0.3,mem=0.8`.
+//
+// By default, Envoy will forward these ORCA response headers/trailers from the upstream service to the downstream client.
+// If the downstream client also uses this information for load balancing, it might lead to unexpected behavior.
+// To avoid this, you can use the `HTTPRoute` or `BackendTrafficPolicy` to remove the load report headers before sending the response to the client.
+//
+// See Envoy proto: envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3.ClientSideWeightedRoundRobin
+// See ORCA Load Report proto: xds.data.orca.v3.orca_load_report.proto
+type BackendUtilization struct {
+	// A given endpoint must report load metrics continuously for at least this long before the endpoint weight will be used.
+	// Default is 10s.
+	// +optional
+	BlackoutPeriod *gwapiv1.Duration `json:"blackoutPeriod,omitempty"`
+
+	// If a given endpoint has not reported load metrics in this long, stop using the reported weight. Defaults to 3m.
+	// +optional
+	WeightExpirationPeriod *gwapiv1.Duration `json:"weightExpirationPeriod,omitempty"`
+
+	// How often endpoint weights are recalculated. Values less than 100ms are capped at 100ms. Default 1s.
+	// +optional
+	WeightUpdatePeriod *gwapiv1.Duration `json:"weightUpdatePeriod,omitempty"`
+
+	// ErrorUtilizationPenaltyPercent adjusts endpoint weights based on the error rate (eps/qps).
+	// This is expressed as a percentage-based integer where 100 represents 1.0, 150 represents 1.5, etc.
+	//
+	// For example:
+	// - 100 => 1.0x
+	// - 120 => 1.2x
+	// - 200 => 2.0x
+	//
+	// Note: In the internal IR/XDS configuration this value is converted back to a
+	// floating point multiplier (value / 100.0).
+	//
+	// Must be non-negative.
+	// +kubebuilder:validation:Minimum=0
+	// +optional
+	ErrorUtilizationPenaltyPercent *uint32 `json:"errorUtilizationPenaltyPercent,omitempty"`
+
+	// Metric names used to compute utilization if application_utilization is not set.
+	// For map fields in ORCA proto, use the form "<map_field>.<key>", e.g., "named_metrics.foo".
+	// +optional
+	MetricNamesForComputingUtilization []string `json:"metricNamesForComputingUtilization,omitempty"`
+
+	// RemoveResponseHeaders removes the ORCA load report headers/trailers before sending the response to the client.
+	// Defaults to true.
+	// +optional
+	// +kubebuilder:default=true
+	RemoveResponseHeaders *bool `json:"removeResponseHeaders,omitempty"`
+}
+
 // ConsistentHashType defines the type of input to hash on.
 // +kubebuilder:validation:Enum=SourceIP;Header;Headers;Cookie;QueryParams
 type ConsistentHashType string

@@ -706,6 +706,59 @@ spec:
                   LoadBalancer policy to apply when routing traffic from the gateway to
                   the backend endpoints. Defaults to `LeastRequest`.
                 properties:
+                  backendUtilization:
+                    description: |-
+                      BackendUtilization defines the configuration when the load balancer type is
+                      set to BackendUtilization.
+                    properties:
+                      blackoutPeriod:
+                        description: |-
+                          A given endpoint must report load metrics continuously for at least this long before the endpoint weight will be used.
+                          Default is 10s.
+                        pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
+                        type: string
+                      errorUtilizationPenaltyPercent:
+                        description: |-
+                          ErrorUtilizationPenaltyPercent adjusts endpoint weights based on the error rate (eps/qps).
+                          This is expressed as a percentage-based integer where 100 represents 1.0, 150 represents 1.5, etc.
+
+                          For example:
+                          - 100 => 1.0x
+                          - 120 => 1.2x
+                          - 200 => 2.0x
+
+                          Note: In the internal IR/XDS configuration this value is converted back to a
+                          floating point multiplier (value / 100.0).
+
+                          Must be non-negative.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      metricNamesForComputingUtilization:
+                        description: |-
+                          Metric names used to compute utilization if application_utilization is not set.
+                          For map fields in ORCA proto, use the form "<map_field>.<key>", e.g., "named_metrics.foo".
+                        items:
+                          type: string
+                        type: array
+                      removeResponseHeaders:
+                        default: true
+                        description: |-
+                          RemoveResponseHeaders removes the ORCA load report headers/trailers before sending the response to the client.
+                          Defaults to true.
+                        type: boolean
+                      weightExpirationPeriod:
+                        description: If a given endpoint has not reported load metrics
+                          in this long, stop using the reported weight. Defaults to
+                          3m.
+                        pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
+                        type: string
+                      weightUpdatePeriod:
+                        description: How often endpoint weights are recalculated.
+                          Values less than 100ms are capped at 100ms. Default 1s.
+                        pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
+                        type: string
+                    type: object
                   consistentHash:
                     description: |-
                       ConsistentHash defines the configuration when the load balancer type is
@@ -852,7 +905,7 @@ spec:
                     description: |-
                       SlowStart defines the configuration related to the slow start load balancer policy.
                       If set, during slow start window, traffic sent to the newly added hosts will gradually increase.
-                      Currently this is only supported for RoundRobin and LeastRequest load balancers
+                      Supported for RoundRobin, LeastRequest, and BackendUtilization load balancers.
                     properties:
                       window:
                         description: |-
@@ -872,12 +925,14 @@ spec:
                       "ConsistentHash",
                       "LeastRequest",
                       "Random",
-                      "RoundRobin".
+                      "RoundRobin",
+                      "BackendUtilization".
                     enum:
                     - ConsistentHash
                     - LeastRequest
                     - Random
                     - RoundRobin
+                    - BackendUtilization
                     type: string
                   zoneAware:
                     description: ZoneAware defines the configuration related to the
@@ -923,14 +978,18 @@ spec:
                     field needs to be set.
                   rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash)
                     : !has(self.consistentHash)'
-                - message: Currently SlowStart is only supported for RoundRobin and
-                    LeastRequest load balancers.
+                - message: If LoadBalancer type is BackendUtilization, backendUtilization
+                    field needs to be set.
+                  rule: 'self.type == ''BackendUtilization'' ? has(self.backendUtilization)
+                    : !has(self.backendUtilization)'
+                - message: Currently SlowStart is only supported for RoundRobin, LeastRequest,
+                    and BackendUtilization load balancers.
                   rule: 'self.type in [''Random'', ''ConsistentHash''] ? !has(self.slowStart)
                     : true '
                 - message: Currently ZoneAware is only supported for LeastRequest,
                     Random, and RoundRobin load balancers.
-                  rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) :
-                    true '
+                  rule: 'self.type in [''ConsistentHash'', ''BackendUtilization'']
+                    ? !has(self.zoneAware) : true '
               mergeType:
                 description: |-
                   MergeType determines how this configuration is merged with existing BackendTrafficPolicy