diff --git a/castai/resource_workload_scaling_policy.go b/castai/resource_workload_scaling_policy.go
index 527a31fe..25c72dcf 100644
--- a/castai/resource_workload_scaling_policy.go
+++ b/castai/resource_workload_scaling_policy.go
@@ -36,6 +36,10 @@ const (
maxExponentValue = 1.
minExponentValue = 0.
defaultApplyType = "IMMEDIATE"
+
+ // CPU stall defaults
+ defaultCPUStallMinPressuredPodPct = 50.0
+ defaultCPUStallThresholdPct = 10.0
)
const (
@@ -66,6 +70,10 @@ const (
FieldApplyThresholdStrategyDefaultAdaptiveType = "DEFAULT_ADAPTIVE"
FieldApplyThresholdStrategyCustomAdaptiveType = "CUSTOM_ADAPTIVE"
FieldAssignmentRules = "assignment_rules"
+ FieldAnomalyDetection = "anomaly_detection"
+ FieldAnomalyDetectionCpuPressure = "cpu_pressure"
+ FieldCpuStallThresholdPercentage = "cpu_stall_threshold_percentage"
+ FieldMinPressuredPodPercentage = "min_pressured_pod_percentage"
)
const (
@@ -346,6 +354,41 @@ It can be either:
},
},
},
+ FieldAnomalyDetection: {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ Description: "Defines anomaly detection settings for the scaling policy.",
+ DiffSuppressFunc: func(k, old, new string, d *schema.ResourceData) bool {
+ return suppressAnomalyDetectionDefaultValueDiff(old, new, d)
+ },
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ FieldAnomalyDetectionCpuPressure: {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ Description: "Configures CPU pressure anomaly detection thresholds.",
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ FieldCpuStallThresholdPercentage: {
+ Type: schema.TypeFloat,
+ Required: true,
+ Description: "Percentage of time (0-100) that a pod must experience CPU pressure to be considered under pressure.",
+ ValidateDiagFunc: validation.ToDiagFunc(validation.FloatBetween(0, 100)),
+ },
+ FieldMinPressuredPodPercentage: {
+ Type: schema.TypeFloat,
+ Required: true,
+ Description: "Percentage (0-100) of pods that must be experiencing pressure for the detector to trigger.",
+ ValidateDiagFunc: validation.ToDiagFunc(validation.FloatBetween(0, 100)),
+ },
+ },
+ },
+ },
+ },
+ },
+ },
},
Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(createTimeout),
@@ -638,6 +681,8 @@ func resourceWorkloadScalingPolicyCreate(ctx context.Context, d *schema.Resource
req.RecommendationPolicies.Jvm = toJvm(toSection(d, FieldJVM))
+ req.RecommendationPolicies.AnomalyDetection = toAnomalyDetection(toSection(d, FieldAnomalyDetection))
+
req.RecommendationPolicies.ExcludedContainers = toExcludedContainers(d)
ar, err := toAssignmentRules(toSection(d, FieldAssignmentRules))
@@ -781,7 +826,9 @@ func fetchScalingPolicy(ctx context.Context, d *schema.ResourceData, meta any) (
if err := d.Set(FieldJVM, toJvmMap(sp.RecommendationPolicies.Jvm)); err != nil {
return nil, fmt.Errorf("setting jvm: %w", err)
}
-
+ if err := d.Set(FieldAnomalyDetection, toAnomalyDetectionMap(sp.RecommendationPolicies.AnomalyDetection)); err != nil {
+ return nil, fmt.Errorf("setting anomaly detection: %w", err)
+ }
if err := d.Set(FieldAssignmentRules, toAssignmentRulesMap(getResourceFrom(d, FieldAssignmentRules), sp.AssignmentRules)); err != nil {
return nil, fmt.Errorf("setting assignment rules: %w", err)
}
@@ -820,6 +867,7 @@ func updateScalingPolicy(ctx context.Context, d *schema.ResourceData, meta any)
FieldPredictiveScaling,
FieldRolloutBehavior,
FieldJVM,
+ FieldAnomalyDetection,
FieldExcludedContainers,
) {
tflog.Info(ctx, "scaling policy up to date")
@@ -857,6 +905,7 @@ func updateScalingPolicy(ctx context.Context, d *schema.ResourceData, meta any)
PredictiveScaling: toPredictiveScaling(toSection(d, FieldPredictiveScaling)),
RolloutBehavior: toRolloutBehavior(toSection(d, FieldRolloutBehavior)),
Jvm: toJvm(toSection(d, FieldJVM)),
+ AnomalyDetection: toAnomalyDetection(toSection(d, FieldAnomalyDetection)),
ExcludedContainers: toExcludedContainers(d),
},
}
@@ -1118,6 +1167,17 @@ func suppressMemoryEventApplyTypeDefaultValueDiff(oldValue, newValue string, d *
return oldValue == newValue
}
+func suppressAnomalyDetectionDefaultValueDiff(oldValue, newValue string, d *schema.ResourceData) bool {
+ if isEmpty(newValue) {
+ cpuStallThreshold := d.Get(fmt.Sprintf("%s.0.%s.0.%s", FieldAnomalyDetection, FieldAnomalyDetectionCpuPressure, FieldCpuStallThresholdPercentage))
+ minPressuredPodPct := d.Get(fmt.Sprintf("%s.0.%s.0.%s", FieldAnomalyDetection, FieldAnomalyDetectionCpuPressure, FieldMinPressuredPodPercentage))
+ // Suppress diff if the API-returned values equal the defaults (meaning no explicit config is needed)
+ return cpuStallThreshold == defaultCPUStallThresholdPct && minPressuredPodPct == defaultCPUStallMinPressuredPodPct
+ }
+
+ return oldValue == newValue
+}
+
func isEmpty(value string) bool {
return value == "" || value == "0"
}
@@ -1492,6 +1552,40 @@ func toRolloutBehaviorMap(s *sdk.WorkloadoptimizationV1RolloutBehaviorSettings)
return []map[string]any{m}
}
+func toAnomalyDetection(m map[string]any) *sdk.WorkloadoptimizationV1AnomalyDetectionSettings {
+ if len(m) == 0 {
+ return nil
+ }
+ result := &sdk.WorkloadoptimizationV1AnomalyDetectionSettings{}
+ if cpuPressure := getFirstElem(m, FieldAnomalyDetectionCpuPressure); cpuPressure != nil {
+ result.CpuPressure = &sdk.WorkloadoptimizationV1CPUPressureSettings{
+ // schema already handles type validation, so casting is safe
+ CpuStallThresholdPercentage: cpuPressure[FieldCpuStallThresholdPercentage].(float64),
+ MinPressuredPodPercentage: cpuPressure[FieldMinPressuredPodPercentage].(float64),
+ }
+ }
+ return result
+}
+
+func toAnomalyDetectionMap(s *sdk.WorkloadoptimizationV1AnomalyDetectionSettings) []map[string]any {
+ if s == nil {
+ return nil
+ }
+ m := map[string]any{}
+ if s.CpuPressure != nil {
+ m[FieldAnomalyDetectionCpuPressure] = []map[string]any{
+ {
+ FieldCpuStallThresholdPercentage: s.CpuPressure.CpuStallThresholdPercentage,
+ FieldMinPressuredPodPercentage: s.CpuPressure.MinPressuredPodPercentage,
+ },
+ }
+ }
+ if len(m) == 0 {
+ return nil
+ }
+ return []map[string]any{m}
+}
+
func toJvm(m map[string]any) *sdk.WorkloadoptimizationV1JVMSettings {
if len(m) == 0 {
return nil
diff --git a/castai/resource_workload_scaling_policy_test.go b/castai/resource_workload_scaling_policy_test.go
index d444cf0d..6d260a72 100644
--- a/castai/resource_workload_scaling_policy_test.go
+++ b/castai/resource_workload_scaling_policy_test.go
@@ -124,6 +124,8 @@ func TestAccGKE_ResourceWorkloadScalingPolicy(t *testing.T) {
// Requires workload-autoscaler from v0.35.3
resource.TestCheckResourceAttr(resourceName, "rollout_behavior.0.type", "NO_DISRUPTION"),
resource.TestCheckResourceAttr(resourceName, "jvm.0.memory.0.optimization", "true"),
+ resource.TestCheckResourceAttr(resourceName, "anomaly_detection.0.cpu_pressure.0.cpu_stall_threshold_percentage", "50"),
+ resource.TestCheckResourceAttr(resourceName, "anomaly_detection.0.cpu_pressure.0.min_pressured_pod_percentage", "30"),
),
},
},
@@ -393,6 +395,12 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string {
confidence {
threshold = 0.6
}
+ anomaly_detection {
+ cpu_pressure {
+ cpu_stall_threshold_percentage = 50
+ min_pressured_pod_percentage = 30
+ }
+ }
jvm {
memory {
optimization = true
@@ -837,6 +845,82 @@ func Test_toRolloutBehaviorMap(t *testing.T) {
}
}
+func Test_toAnomalyDetection(t *testing.T) {
+ tests := map[string]struct {
+ args map[string]any
+ exp *sdk.WorkloadoptimizationV1AnomalyDetectionSettings
+ }{
+ "should return nil on empty map": {
+ args: map[string]any{},
+ exp: nil,
+ },
+ "should return anomaly detection settings with cpu_pressure": {
+ args: map[string]any{
+ FieldAnomalyDetectionCpuPressure: []any{
+ map[string]any{
+ FieldCpuStallThresholdPercentage: float64(50),
+ FieldMinPressuredPodPercentage: float64(30),
+ },
+ },
+ },
+ exp: &sdk.WorkloadoptimizationV1AnomalyDetectionSettings{
+ CpuPressure: &sdk.WorkloadoptimizationV1CPUPressureSettings{
+ CpuStallThresholdPercentage: 50,
+ MinPressuredPodPercentage: 30,
+ },
+ },
+ },
+ }
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ r := require.New(t)
+ got := toAnomalyDetection(tt.args)
+ r.Equal(tt.exp, got)
+ })
+ }
+}
+
+func Test_toAnomalyDetectionMap(t *testing.T) {
+ tests := map[string]struct {
+ args *sdk.WorkloadoptimizationV1AnomalyDetectionSettings
+ exp []map[string]any
+ }{
+ "should return nil for nil input": {
+ args: nil,
+ exp: nil,
+ },
+ "should return anomaly detection map with cpu_pressure": {
+ args: &sdk.WorkloadoptimizationV1AnomalyDetectionSettings{
+ CpuPressure: &sdk.WorkloadoptimizationV1CPUPressureSettings{
+ CpuStallThresholdPercentage: 50,
+ MinPressuredPodPercentage: 30,
+ },
+ },
+ exp: []map[string]any{
+ {
+ FieldAnomalyDetectionCpuPressure: []map[string]any{
+ {
+ FieldCpuStallThresholdPercentage: float64(50),
+ FieldMinPressuredPodPercentage: float64(30),
+ },
+ },
+ },
+ },
+ },
+ "should return nil for empty settings": {
+ args: &sdk.WorkloadoptimizationV1AnomalyDetectionSettings{},
+ exp: nil,
+ },
+ }
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ r := require.New(t)
+ got := toAnomalyDetectionMap(tt.args)
+ r.Equal(tt.exp, got)
+ })
+ }
+}
+
func Test_toJvm(t *testing.T) {
tests := map[string]struct {
args map[string]any
diff --git a/docs/resources/workload_scaling_policy.md b/docs/resources/workload_scaling_policy.md
index 2a3371cc..afa572af 100644
--- a/docs/resources/workload_scaling_policy.md
+++ b/docs/resources/workload_scaling_policy.md
@@ -88,6 +88,12 @@ resource "castai_workload_scaling_policy" "services" {
rollout_behavior {
type = "NO_DISRUPTION"
}
+ anomaly_detection {
+ cpu_pressure {
+ cpu_stall_threshold_percentage = 50
+ min_pressured_pod_percentage = 30
+ }
+ }
jvm {
memory {
optimization = true
@@ -115,6 +121,7 @@ resource "castai_workload_scaling_policy" "services" {
### Optional
+- `anomaly_detection` (Block List, Max: 1) Defines anomaly detection settings for the scaling policy. (see [below for nested schema](#nestedblock--anomaly_detection))
- `anti_affinity` (Block List, Max: 1) (see [below for nested schema](#nestedblock--anti_affinity))
- `assignment_rules` (Block List) Allows defining conditions for automatically assigning workloads to this scaling policy. (see [below for nested schema](#nestedblock--assignment_rules))
- `confidence` (Block List, Max: 1) Defines the confidence settings for applying recommendations. (see [below for nested schema](#nestedblock--confidence))
@@ -248,6 +255,23 @@ Optional:
+
+### Nested Schema for `anomaly_detection`
+
+Optional:
+
+- `cpu_pressure` (Block List, Max: 1) Configures CPU pressure anomaly detection thresholds. (see [below for nested schema](#nestedblock--anomaly_detection--cpu_pressure))
+
+
+### Nested Schema for `anomaly_detection.cpu_pressure`
+
+Required:
+
+- `cpu_stall_threshold_percentage` (Number) Percentage of time (0-100) that a pod must experience CPU pressure to be considered under pressure.
+- `min_pressured_pod_percentage` (Number) Percentage (0-100) of pods that must be experiencing pressure for the detector to trigger.
+
+
+
### Nested Schema for `anti_affinity`
diff --git a/examples/resources/castai_workload_scaling_policy/resource.tf b/examples/resources/castai_workload_scaling_policy/resource.tf
index bda0b5a4..c0c2c2aa 100644
--- a/examples/resources/castai_workload_scaling_policy/resource.tf
+++ b/examples/resources/castai_workload_scaling_policy/resource.tf
@@ -71,6 +71,12 @@ resource "castai_workload_scaling_policy" "services" {
rollout_behavior {
type = "NO_DISRUPTION"
}
+ anomaly_detection {
+ cpu_pressure {
+ cpu_stall_threshold_percentage = 50
+ min_pressured_pod_percentage = 30
+ }
+ }
jvm {
memory {
optimization = true