feat(infra): Add rateLimitHpa support in EnvoyGateway API (#4983)

keithfz · web-flow · commit d4a075647630 · 2025-01-06T14:12:49.000-08:00
* add hpa support for rate limit in EnvoyGateway API

Signed-off-by: keithfz &lt;kzeto4@gmail.com&gt;
diff --git a/api/v1alpha1/envoygateway_helpers.go b/api/v1alpha1/envoygateway_helpers.go
@@ -235,6 +235,10 @@ func (r *EnvoyGatewayProvider) GetEnvoyGatewayKubeProvider() *EnvoyGatewayKubern
 
 	r.Kubernetes.RateLimitDeployment.defaultKubernetesDeploymentSpec(DefaultRateLimitImage)
 
+	if r.Kubernetes.RateLimitHpa != nil {
+		r.Kubernetes.RateLimitHpa.setDefault()
+	}
+
 	if r.Kubernetes.ShutdownManager == nil {
 		r.Kubernetes.ShutdownManager = &ShutdownManager{Image: ptr.To(DefaultShutdownManagerImage)}
 	}
diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go
@@ -202,6 +202,12 @@ type EnvoyGatewayKubernetesProvider struct {
 	// +optional
 	RateLimitDeployment *KubernetesDeploymentSpec `json:"rateLimitDeployment,omitempty"`
 
+	// RateLimitHpa defines the Horizontal Pod Autoscaler settings for Envoy ratelimit Deployment.
+	// If the HPA is set, Replicas field from RateLimitDeployment will be ignored.
+	//
+	// +optional
+	RateLimitHpa *KubernetesHorizontalPodAutoscalerSpec `json:"rateLimitHpa,omitempty"`
+
 	// Watch holds configuration of which input resources should be watched and reconciled.
 	// +optional
 	Watch *KubernetesWatchMode `json:"watch,omitempty"`
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
@@ -42,6 +42,7 @@ type ResourceRender struct {
 
 	rateLimit           *egv1a1.RateLimit
 	rateLimitDeployment *egv1a1.KubernetesDeploymentSpec
+	rateLimitHpa        *egv1a1.KubernetesHorizontalPodAutoscalerSpec
 
 	// ownerReferenceUID store the uid of its owner reference.
 	ownerReferenceUID map[string]types.UID
@@ -53,6 +54,7 @@ func NewResourceRender(ns string, gateway *egv1a1.EnvoyGateway, ownerReferenceUI
 		Namespace:           ns,
 		rateLimit:           gateway.RateLimit,
 		rateLimitDeployment: gateway.GetEnvoyGatewayProvider().GetEnvoyGatewayKubeProvider().RateLimitDeployment,
+		rateLimitHpa:        gateway.GetEnvoyGatewayProvider().GetEnvoyGatewayKubeProvider().RateLimitHpa,
 		ownerReferenceUID:   ownerReferenceUID,
 	}
 }
@@ -288,6 +290,11 @@ func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) {
 		}
 	}
 
+	// omit the deployment replicas if HPA is being set
+	if r.rateLimitHpa != nil {
+		deployment.Spec.Replicas = nil
+	}
+
 	// apply merge patch to deployment
 	var err error
 	if deployment, err = r.rateLimitDeployment.ApplyMergePatch(deployment); err != nil {
@@ -309,11 +316,50 @@ func (r *ResourceRender) DaemonSet() (*appsv1.DaemonSet, error) {
 
 // HorizontalPodAutoscalerSpec returns the `HorizontalPodAutoscaler` sets spec.
 func (r *ResourceRender) HorizontalPodAutoscalerSpec() (*egv1a1.KubernetesHorizontalPodAutoscalerSpec, error) {
-	return nil, nil
+	return r.rateLimitHpa, nil
 }
 
 func (r *ResourceRender) HorizontalPodAutoscaler() (*autoscalingv2.HorizontalPodAutoscaler, error) {
-	return nil, nil
+	hpaConfig, err := r.HorizontalPodAutoscalerSpec()
+	if hpaConfig == nil {
+		return nil, err
+	}
+
+	hpa := &autoscalingv2.HorizontalPodAutoscaler{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: "autoscaling/v2",
+			Kind:       "HorizontalPodAutoscaler",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: r.Namespace,
+			Name:      r.Name(),
+			Labels:    rateLimitLabels(),
+		},
+		Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
+			ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
+				APIVersion: "apps/v1",
+				Kind:       "Deployment",
+			},
+			MinReplicas: hpaConfig.MinReplicas,
+			MaxReplicas: ptr.Deref(hpaConfig.MaxReplicas, 1),
+			Metrics:     hpaConfig.Metrics,
+			Behavior:    hpaConfig.Behavior,
+		},
+	}
+
+	// set deployment target ref name
+	deploymentConfig := r.rateLimitDeployment
+	if deploymentConfig.Name != nil {
+		hpa.Spec.ScaleTargetRef.Name = *deploymentConfig.Name
+	} else {
+		hpa.Spec.ScaleTargetRef.Name = r.Name()
+	}
+
+	if hpa, err = hpaConfig.ApplyMergePatch(hpa); err != nil {
+		return nil, err
+	}
+
+	return hpa, nil
 }
 
 // PodDisruptionBudgetSpec returns the `PodDisruptionBudget` sets spec.
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
@@ -15,6 +15,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	appsv1 "k8s.io/api/apps/v1"
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -765,6 +766,123 @@ func loadDeployment(caseName string) (*appsv1.Deployment, error) {
 	return deployment, nil
 }
 
+func TestHorizontalPodAutoscaler(t *testing.T) {
+	cfg, err := config.New()
+	require.NoError(t, err)
+	cases := []struct {
+		caseName            string
+		rateLimit           *egv1a1.RateLimit
+		rateLimitHpa        *egv1a1.KubernetesHorizontalPodAutoscalerSpec
+		rateLimitDeployment *egv1a1.KubernetesDeploymentSpec
+	}{
+		{
+			caseName: "default",
+			rateLimit: &egv1a1.RateLimit{
+				Backend: egv1a1.RateLimitDatabaseBackend{
+					Type: egv1a1.RedisBackendType,
+					Redis: &egv1a1.RateLimitRedisSettings{
+						URL: "redis.redis.svc:6379",
+					},
+				},
+			},
+			rateLimitHpa: &egv1a1.KubernetesHorizontalPodAutoscalerSpec{},
+		},
+		{
+			caseName: "custom",
+			rateLimit: &egv1a1.RateLimit{
+				Backend: egv1a1.RateLimitDatabaseBackend{
+					Type: egv1a1.RedisBackendType,
+					Redis: &egv1a1.RateLimitRedisSettings{
+						URL: "redis.redis.svc:6379",
+					},
+				},
+			},
+			rateLimitHpa: &egv1a1.KubernetesHorizontalPodAutoscalerSpec{
+				MinReplicas: ptr.To[int32](5),
+				MaxReplicas: ptr.To[int32](10),
+				Metrics: []autoscalingv2.MetricSpec{
+					{
+						Resource: &autoscalingv2.ResourceMetricSource{
+							Name: corev1.ResourceCPU,
+							Target: autoscalingv2.MetricTarget{
+								Type:               autoscalingv2.UtilizationMetricType,
+								AverageUtilization: ptr.To[int32](60),
+							},
+						},
+						Type: autoscalingv2.ResourceMetricSourceType,
+					},
+					{
+						Resource: &autoscalingv2.ResourceMetricSource{
+							Name: corev1.ResourceMemory,
+							Target: autoscalingv2.MetricTarget{
+								Type:               autoscalingv2.UtilizationMetricType,
+								AverageUtilization: ptr.To[int32](70),
+							},
+						},
+						Type: autoscalingv2.ResourceMetricSourceType,
+					},
+				},
+			},
+		},
+		{
+			caseName: "with-deployment-name",
+			rateLimit: &egv1a1.RateLimit{
+				Backend: egv1a1.RateLimitDatabaseBackend{
+					Type: egv1a1.RedisBackendType,
+					Redis: &egv1a1.RateLimitRedisSettings{
+						URL: "redis.redis.svc:6379",
+					},
+				},
+			},
+			rateLimitHpa: &egv1a1.KubernetesHorizontalPodAutoscalerSpec{},
+			rateLimitDeployment: &egv1a1.KubernetesDeploymentSpec{
+				Name: ptr.To("foo"),
+			},
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.caseName, func(t *testing.T) {
+			cfg.EnvoyGateway.RateLimit = tc.rateLimit
+
+			cfg.EnvoyGateway.Provider = &egv1a1.EnvoyGatewayProvider{
+				Type: egv1a1.ProviderTypeKubernetes,
+				Kubernetes: &egv1a1.EnvoyGatewayKubernetesProvider{
+					RateLimitHpa:        tc.rateLimitHpa,
+					RateLimitDeployment: tc.rateLimitDeployment,
+				},
+			}
+			r := NewResourceRender(cfg.Namespace, cfg.EnvoyGateway, ownerReferenceUID)
+			hpa, err := r.HorizontalPodAutoscaler()
+			require.NoError(t, err)
+
+			if *overrideTestData {
+				hpaYAML, err := yaml.Marshal(hpa)
+				require.NoError(t, err)
+				// nolint:gosec
+				err = os.WriteFile(fmt.Sprintf("testdata/hpa/%s.yaml", tc.caseName), hpaYAML, 0o644)
+				require.NoError(t, err)
+				return
+			}
+
+			expected, err := loadHpa(tc.caseName)
+			require.NoError(t, err)
+
+			assert.Equal(t, expected, hpa)
+		})
+	}
+}
+
+func loadHpa(caseName string) (*autoscalingv2.HorizontalPodAutoscaler, error) {
+	hpaYAML, err := os.ReadFile(fmt.Sprintf("testdata/hpa/%s.yaml", caseName))
+	if err != nil {
+		return nil, err
+	}
+
+	hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+	_ = yaml.Unmarshal(hpaYAML, hpa)
+	return hpa, nil
+}
+
 func TestGetServiceURL(t *testing.T) {
 	got := GetServiceURL("envoy-gateway-system", "example-cluster.local")
 	assert.Equal(t, "grpc://envoy-ratelimit.envoy-gateway-system.svc.example-cluster.local:8081", got)
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/custom.yaml
@@ -0,0 +1,33 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  creationTimestamp: null
+  labels:
+    app.kubernetes.io/component: ratelimit
+    app.kubernetes.io/managed-by: envoy-gateway
+    app.kubernetes.io/name: envoy-ratelimit
+  name: envoy-ratelimit
+  namespace: envoy-gateway-system
+spec:
+  maxReplicas: 10
+  metrics:
+  - resource:
+      name: cpu
+      target:
+        averageUtilization: 60
+        type: Utilization
+    type: Resource
+  - resource:
+      name: memory
+      target:
+        averageUtilization: 70
+        type: Utilization
+    type: Resource
+  minReplicas: 5
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: envoy-ratelimit
+status:
+  currentMetrics: null
+  desiredReplicas: 0
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/default.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/default.yaml
@@ -0,0 +1,26 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  creationTimestamp: null
+  labels:
+    app.kubernetes.io/component: ratelimit
+    app.kubernetes.io/managed-by: envoy-gateway
+    app.kubernetes.io/name: envoy-ratelimit
+  name: envoy-ratelimit
+  namespace: envoy-gateway-system
+spec:
+  maxReplicas: 1
+  metrics:
+  - resource:
+      name: cpu
+      target:
+        averageUtilization: 80
+        type: Utilization
+    type: Resource
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: envoy-ratelimit
+status:
+  currentMetrics: null
+  desiredReplicas: 0
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/with-deployment-name.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/hpa/with-deployment-name.yaml
@@ -0,0 +1,26 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  creationTimestamp: null
+  labels:
+    app.kubernetes.io/component: ratelimit
+    app.kubernetes.io/managed-by: envoy-gateway
+    app.kubernetes.io/name: envoy-ratelimit
+  name: envoy-ratelimit
+  namespace: envoy-gateway-system
+spec:
+  maxReplicas: 1
+  metrics:
+  - resource:
+      name: cpu
+      target:
+        averageUtilization: 80
+        type: Utilization
+    type: Resource
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: foo
+status:
+  currentMetrics: null
+  desiredReplicas: 0
diff --git a/release-notes/current.yaml b/release-notes/current.yaml
@@ -17,6 +17,7 @@ new features: |
   Added support for trusted CIDRs in the ClientIPDetectionSettings API
   Added support for sending attributes to external processor in EnvoyExtensionPolicy API
   Added support for patching EnvoyProxy.spec.provider.kubernetes.envoyHpa and EnvoyProxy.spec.provider.kubernetes.envoyPDB
+  Added support for defining rateLimitHpa in EnvoyGateway API
 
 # Fixes for bugs identified in previous versions.
 bug fixes: |
diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md
@@ -1132,6 +1132,7 @@ _Appears in:_
 | Field | Type | Required | Description |
 | ---   | ---  | ---      | ---         |
 | `rateLimitDeployment` | _[KubernetesDeploymentSpec](#kubernetesdeploymentspec)_ |  false  | RateLimitDeployment defines the desired state of the Envoy ratelimit deployment resource.<br />If unspecified, default settings for the managed Envoy ratelimit deployment resource<br />are applied. |
+| `rateLimitHpa` | _[KubernetesHorizontalPodAutoscalerSpec](#kuberneteshorizontalpodautoscalerspec)_ |  false  | RateLimitHpa defines the Horizontal Pod Autoscaler settings for Envoy ratelimit Deployment.<br />If the HPA is set, Replicas field from RateLimitDeployment will be ignored. |
 | `watch` | _[KubernetesWatchMode](#kuberneteswatchmode)_ |  false  | Watch holds configuration of which input resources should be watched and reconciled. |
 | `deploy` | _[KubernetesDeployMode](#kubernetesdeploymode)_ |  false  | Deploy holds configuration of how output managed resources such as the Envoy Proxy data plane<br />should be deployed |
 | `overwriteControlPlaneCerts` | _boolean_ |  false  | OverwriteControlPlaneCerts updates the secrets containing the control plane certs, when set. |
@@ -2521,6 +2522,7 @@ Envoy Gateway will revert back to this value every time reconciliation occurs.
 See k8s.io.autoscaling.v2.HorizontalPodAutoScalerSpec.
 
 _Appears in:_
+- [EnvoyGatewayKubernetesProvider](#envoygatewaykubernetesprovider)
 - [EnvoyProxyKubernetesProvider](#envoyproxykubernetesprovider)
 
 | Field | Type | Required | Description |
diff --git a/site/content/zh/latest/api/extension_types.md b/site/content/zh/latest/api/extension_types.md
@@ -1132,6 +1132,7 @@ _Appears in:_
 | Field | Type | Required | Description |
 | ---   | ---  | ---      | ---         |
 | `rateLimitDeployment` | _[KubernetesDeploymentSpec](#kubernetesdeploymentspec)_ |  false  | RateLimitDeployment defines the desired state of the Envoy ratelimit deployment resource.<br />If unspecified, default settings for the managed Envoy ratelimit deployment resource<br />are applied. |
+| `rateLimitHpa` | _[KubernetesHorizontalPodAutoscalerSpec](#kuberneteshorizontalpodautoscalerspec)_ |  false  | RateLimitHpa defines the Horizontal Pod Autoscaler settings for Envoy ratelimit Deployment.<br />If the HPA is set, Replicas field from RateLimitDeployment will be ignored. |
 | `watch` | _[KubernetesWatchMode](#kuberneteswatchmode)_ |  false  | Watch holds configuration of which input resources should be watched and reconciled. |
 | `deploy` | _[KubernetesDeployMode](#kubernetesdeploymode)_ |  false  | Deploy holds configuration of how output managed resources such as the Envoy Proxy data plane<br />should be deployed |
 | `overwriteControlPlaneCerts` | _boolean_ |  false  | OverwriteControlPlaneCerts updates the secrets containing the control plane certs, when set. |
@@ -2521,6 +2522,7 @@ Envoy Gateway will revert back to this value every time reconciliation occurs.
 See k8s.io.autoscaling.v2.HorizontalPodAutoScalerSpec.
 
 _Appears in:_
+- [EnvoyGatewayKubernetesProvider](#envoygatewaykubernetesprovider)
 - [EnvoyProxyKubernetesProvider](#envoyproxykubernetesprovider)
 
 | Field | Type | Required | Description |

Original file line number	Diff line number	Diff line change
`@@ -235,6 +235,10 @@ func (r EnvoyGatewayProvider) GetEnvoyGatewayKubeProvider() EnvoyGatewayKubern`
`235`	`235`
`236`	`236`	`r.Kubernetes.RateLimitDeployment.defaultKubernetesDeploymentSpec(DefaultRateLimitImage)`
`237`	`237`
	`238`	`+ if r.Kubernetes.RateLimitHpa != nil {`
	`239`	`+ r.Kubernetes.RateLimitHpa.setDefault()`
	`240`	`+ }`
	`241`	`+`
`238`	`242`	`if r.Kubernetes.ShutdownManager == nil {`
`239`	`243`	`r.Kubernetes.ShutdownManager = &ShutdownManager{Image: ptr.To(DefaultShutdownManagerImage)}`
`240`	`244`	`}`