Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
971c3f1
Add support for ClientSideWeightedRoundRobin load balancer policy in …
Nov 2, 2025
a9d01f0
Adds / Updates test cases for client wrr
Nov 2, 2025
63e8b2f
Fix: gen-check ci, coverage-check
Nov 3, 2025
522534b
Remove `enableOOBLoadReport` and `oobReportingPeriod` fields from gat…
Nov 3, 2025
4512f9d
Remove `enableOOBLoadReport` and `oobReportingPeriod` fields from Cli…
Nov 3, 2025
dd5d285
Update: Change `ErrorUtilizationPenalty` type from float to integer a…
Nov 16, 2025
1101c1d
Merge branch 'main' into client-wrr
Nov 26, 2025
14d1d59
Merge branch 'main' into client-wrr
jukie Nov 27, 2025
8bce886
Update: Refactor `ErrorUtilizationPenalty` to use integer type with p…
Dec 7, 2025
0d65b01
Merge remote-tracking branch 'upstream/main' into client-wrr
Dec 7, 2025
a479e67
Refactor: Replace `ptr.To(metav1.Duration)` with `ir.MetaV1DurationPt…
Dec 7, 2025
777a1c1
Update: Add support for SlowStart configuration for ClientSideWeighte…
Dec 7, 2025
b88903c
Merge remote-tracking branch 'upstream/main' into client-wrr
Jan 20, 2026
234cffa
Update: Replace `ClientSideWeightedRoundRobin` load balancer type wit…
Jan 21, 2026
d012b11
Merge branch 'main' into client-wrr
Jan 21, 2026
44bfe7b
Update: Replace `ClientSideWeightedRoundRobin` references with `Backe…
Jan 21, 2026
7edc401
Merge branch 'main' into client-wrr
Jan 21, 2026
84a0942
Merge branch 'main' into client-wrr
Jan 21, 2026
afb690d
Update: Replace `lb-backend-bu` references with `lb-backend-utilizati…
Jan 21, 2026
3785264
Remove: Delete obsolete BackendUtilization OOB and penalty load balan…
Jan 21, 2026
6321ec4
Merge branch 'main' into client-wrr
jukie Jan 21, 2026
05016c4
Merge branch 'main' into client-wrr
Jan 28, 2026
7d7528f
Update: Add detailed documentation and configuration examples for Bac…
Jan 29, 2026
870c52c
Merge branch 'main' into client-wrr
Jan 29, 2026
1fa3ff3
Merge remote-tracking branch 'upstream/main' into client-wrr
Feb 1, 2026
c3e06cb
Update: Rename `ErrorUtilizationPenalty` to `ErrorUtilizationPenaltyP…
altaiezior Feb 2, 2026
9c9dc20
Update: Add support for `removeResponseHeaders` in `BackendUtilizatio…
altaiezior Feb 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 72 additions & 5 deletions api/v1alpha1/loadbalancer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ import gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
// +union
//
// +kubebuilder:validation:XValidation:rule="self.type == 'ConsistentHash' ? has(self.consistentHash) : !has(self.consistentHash)",message="If LoadBalancer type is consistentHash, consistentHash field needs to be set."
// +kubebuilder:validation:XValidation:rule="self.type in ['Random', 'ConsistentHash'] ? !has(self.slowStart) : true ",message="Currently SlowStart is only supported for RoundRobin and LeastRequest load balancers."
// +kubebuilder:validation:XValidation:rule="self.type == 'ConsistentHash' ? !has(self.zoneAware) : true ",message="Currently ZoneAware is only supported for LeastRequest, Random, and RoundRobin load balancers."
// +kubebuilder:validation:XValidation:rule="self.type == 'BackendUtilization' ? has(self.backendUtilization) : !has(self.backendUtilization)",message="If LoadBalancer type is BackendUtilization, backendUtilization field needs to be set."
// +kubebuilder:validation:XValidation:rule="self.type in ['Random', 'ConsistentHash'] ? !has(self.slowStart) : true ",message="Currently SlowStart is only supported for RoundRobin, LeastRequest, and BackendUtilization load balancers."
// +kubebuilder:validation:XValidation:rule="self.type in ['ConsistentHash', 'BackendUtilization'] ? !has(self.zoneAware) : true ",message="Currently ZoneAware is only supported for LeastRequest, Random, and RoundRobin load balancers."
type LoadBalancer struct {
// Type decides the type of Load Balancer policy.
// Valid LoadBalancerType values are
// "ConsistentHash",
// "LeastRequest",
// "Random",
// "RoundRobin".
// "RoundRobin",
// "BackendUtilization".
//
// +unionDiscriminator
Type LoadBalancerType `json:"type"`
Expand All @@ -29,6 +31,12 @@ type LoadBalancer struct {
// +optional
ConsistentHash *ConsistentHash `json:"consistentHash,omitempty"`

// BackendUtilization defines the configuration when the load balancer type is
// set to BackendUtilization.
//
// +optional
BackendUtilization *BackendUtilization `json:"backendUtilization,omitempty"`

// EndpointOverride defines the configuration for endpoint override.
// When specified, the load balancer will attempt to route requests to endpoints
// based on the override information extracted from request headers or metadata.
Expand All @@ -39,7 +47,7 @@ type LoadBalancer struct {

// SlowStart defines the configuration related to the slow start load balancer policy.
// If set, during slow start window, traffic sent to the newly added hosts will gradually increase.
// Currently this is only supported for RoundRobin and LeastRequest load balancers
// Supported for RoundRobin, LeastRequest, and BackendUtilization load balancers.
//
// +optional
SlowStart *SlowStart `json:"slowStart,omitempty"`
Expand All @@ -51,7 +59,7 @@ type LoadBalancer struct {
}

// LoadBalancerType specifies the types of LoadBalancer.
// +kubebuilder:validation:Enum=ConsistentHash;LeastRequest;Random;RoundRobin
// +kubebuilder:validation:Enum=ConsistentHash;LeastRequest;Random;RoundRobin;BackendUtilization
type LoadBalancerType string

const (
Expand All @@ -63,6 +71,8 @@ const (
RandomLoadBalancerType LoadBalancerType = "Random"
// RoundRobinLoadBalancerType load balancer policy.
RoundRobinLoadBalancerType LoadBalancerType = "RoundRobin"
// BackendUtilizationLoadBalancerType load balancer policy.
BackendUtilizationLoadBalancerType LoadBalancerType = "BackendUtilization"
)

// ConsistentHash defines the configuration related to the consistent hash
Expand Down Expand Up @@ -148,6 +158,63 @@ type Cookie struct {
Attributes map[string]string `json:"attributes,omitempty"`
}

// BackendUtilization defines configuration for Envoy's Backend Utilization policy.
// It uses Open Resource Cost Application (ORCA) load metrics reported by endpoints to make load balancing decisions.
// These metrics are typically sent by the backend service in response headers or trailers.
//
// The backend should report these metrics in header/trailer as one of the following formats:
// - Binary: `endpoint-load-metrics-bin` with base64-encoded serialized `OrcaLoadReport` proto.
// - JSON: `endpoint-load-metrics` with JSON-encoded `OrcaLoadReport` proto, e.g., `JSON {"cpu_utilization": 0.3}`.
// - TEXT: `endpoint-load-metrics` with comma-separated key-value pairs, e.g., `TEXT cpu=0.3,mem=0.8`.
//
// By default, Envoy will forward these ORCA response headers/trailers from the upstream service to the downstream client.
// If the downstream client also uses this information for load balancing, it might lead to unexpected behavior.
// To avoid this, you can use the `HTTPRoute` or `BackendTrafficPolicy` to remove the load report headers before sending the response to the client.
//
// See Envoy proto: envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3.ClientSideWeightedRoundRobin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/load_balancing_policies/client_side_weighted_round_robin/v3/client_side_weighted_round_robin.proto is not descriptive enough, and doesnt mention how this needs to be instrumented in the upstream, are there other links than can be used

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the docs aren't great... for example out-of-band reporting isn't supported at all yet reading the docs would indicate otherwise.

We could contribute some docs changes upstream but agreed that including a reference to how endpoint-load-metrics and endpoint-load-metrics-bin are instrumented would be useful here.

Copy link
Author

@altaiezior altaiezior Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arkodg what changes would you suggest me to add here?

This is a bit more elaborative document https://docs.cloud.google.com/load-balancing/docs/https/applb-custom-metrics although this can also be confusing and gcp specific.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do any docs exist that explain how a backend / server should be enhanced to send the appropriate trailors in the response ?
looking for a more end user facing doc for the feature ( which is described in https://www.youtube.com/watch?v=lfv_Oj1BLn0)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a google doc I will have to find it though but nothing stated officially as far I am aware of

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should consider adding one if it doesnt exist
cc @AndresGuedez

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, this is good feedback on the docs gaps.

I'll work with @efimki on this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks @AndresGuedez ! Happy to be the beta tester

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is specific to google but some of their descriptions on ORCA/custom metrics may be helpful for the EG API fields - https://docs.cloud.google.com/load-balancing/docs/https/applb-custom-metrics#configure-custom-metrics

That's not an explicit ask to make changes but give this a read.

// See ORCA Load Report proto: xds.data.orca.v3.orca_load_report.proto
type BackendUtilization struct {
// A given endpoint must report load metrics continuously for at least this long before the endpoint weight will be used.
// Default is 10s.
// +optional
BlackoutPeriod *gwapiv1.Duration `json:"blackoutPeriod,omitempty"`

// If a given endpoint has not reported load metrics in this long, stop using the reported weight. Defaults to 3m.
// +optional
WeightExpirationPeriod *gwapiv1.Duration `json:"weightExpirationPeriod,omitempty"`

// How often endpoint weights are recalculated. Values less than 100ms are capped at 100ms. Default 1s.
// +optional
WeightUpdatePeriod *gwapiv1.Duration `json:"weightUpdatePeriod,omitempty"`

// ErrorUtilizationPenaltyPercent adjusts endpoint weights based on the error rate (eps/qps).
// This is expressed as a percentage-based integer where 100 represents 1.0, 150 represents 1.5, etc.
//
// For example:
// - 100 => 1.0x
// - 120 => 1.2x
// - 200 => 2.0x
//
// Note: In the internal IR/XDS configuration this value is converted back to a
// floating point multiplier (value / 100.0).
//
// Must be non-negative.
// +kubebuilder:validation:Minimum=0
// +optional
ErrorUtilizationPenaltyPercent *uint32 `json:"errorUtilizationPenaltyPercent,omitempty"`

// Metric names used to compute utilization if application_utilization is not set.
// For map fields in ORCA proto, use the form "<map_field>.<key>", e.g., "named_metrics.foo".
// +optional
MetricNamesForComputingUtilization []string `json:"metricNamesForComputingUtilization,omitempty"`

// RemoveResponseHeaders removes the ORCA load report headers/trailers before sending the response to the client.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we are removing by default, should we change the field name to KeepResponseHeaders so default value is false @jukie

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure let me know if I have to make the changes, I also forgot to reframe the documentation here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah prefer KeepResponseHeaders which is false by default

// Defaults to true.
// +optional
// +kubebuilder:default=true
RemoveResponseHeaders *bool `json:"removeResponseHeaders,omitempty"`
}

// ConsistentHashType defines the type of input to hash on.
// +kubebuilder:validation:Enum=SourceIP;Header;Headers;Cookie;QueryParams
type ConsistentHashType string
Expand Down
50 changes: 50 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,59 @@ spec:
LoadBalancer policy to apply when routing traffic from the gateway to
the backend endpoints. Defaults to `LeastRequest`.
properties:
backendUtilization:
description: |-
BackendUtilization defines the configuration when the load balancer type is
set to BackendUtilization.
properties:
blackoutPeriod:
description: |-
A given endpoint must report load metrics continuously for at least this long before the endpoint weight will be used.
Default is 10s.
pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
type: string
errorUtilizationPenaltyPercent:
description: |-
ErrorUtilizationPenaltyPercent adjusts endpoint weights based on the error rate (eps/qps).
This is expressed as a percentage-based integer where 100 represents 1.0, 150 represents 1.5, etc.
For example:
- 100 => 1.0x
- 120 => 1.2x
- 200 => 2.0x
Note: In the internal IR/XDS configuration this value is converted back to a
floating point multiplier (value / 100.0).
Must be non-negative.
format: int32
minimum: 0
type: integer
metricNamesForComputingUtilization:
description: |-
Metric names used to compute utilization if application_utilization is not set.
For map fields in ORCA proto, use the form "<map_field>.<key>", e.g., "named_metrics.foo".
items:
type: string
type: array
removeResponseHeaders:
default: true
description: |-
RemoveResponseHeaders removes the ORCA load report headers/trailers before sending the response to the client.
Defaults to true.
type: boolean
weightExpirationPeriod:
description: If a given endpoint has not reported load metrics
in this long, stop using the reported weight. Defaults to
3m.
pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
type: string
weightUpdatePeriod:
description: How often endpoint weights are recalculated.
Values less than 100ms are capped at 100ms. Default 1s.
pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$
type: string
type: object
consistentHash:
description: |-
ConsistentHash defines the configuration when the load balancer type is
Expand Down Expand Up @@ -852,7 +905,7 @@ spec:
description: |-
SlowStart defines the configuration related to the slow start load balancer policy.
If set, during slow start window, traffic sent to the newly added hosts will gradually increase.
Currently this is only supported for RoundRobin and LeastRequest load balancers
Supported for RoundRobin, LeastRequest, and BackendUtilization load balancers.
properties:
window:
description: |-
Expand All @@ -872,12 +925,14 @@ spec:
"ConsistentHash",
"LeastRequest",
"Random",
"RoundRobin".
"RoundRobin",
"BackendUtilization".
enum:
- ConsistentHash
- LeastRequest
- Random
- RoundRobin
- BackendUtilization
type: string
zoneAware:
description: ZoneAware defines the configuration related to the
Expand Down Expand Up @@ -923,14 +978,18 @@ spec:
field needs to be set.
rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash)
: !has(self.consistentHash)'
- message: Currently SlowStart is only supported for RoundRobin and
LeastRequest load balancers.
- message: If LoadBalancer type is BackendUtilization, backendUtilization
field needs to be set.
rule: 'self.type == ''BackendUtilization'' ? has(self.backendUtilization)
: !has(self.backendUtilization)'
- message: Currently SlowStart is only supported for RoundRobin, LeastRequest,
and BackendUtilization load balancers.
rule: 'self.type in [''Random'', ''ConsistentHash''] ? !has(self.slowStart)
: true '
- message: Currently ZoneAware is only supported for LeastRequest,
Random, and RoundRobin load balancers.
rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) :
true '
rule: 'self.type in [''ConsistentHash'', ''BackendUtilization'']
? !has(self.zoneAware) : true '
mergeType:
description: |-
MergeType determines how this configuration is merged with existing BackendTrafficPolicy
Expand Down
Loading
Loading