Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 23 additions & 26 deletions test/e2e-go/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,11 +541,9 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
createNamespace(t, ns)

// Deploy a workload using stress-ng to generate known CPU/memory load.
// Overprovisioned: requests 300m / 128Mi memory, actual ~200m CPU / ~100Mi.
// Burstable QoS (no limits) so the pod schedules reliably on the shared
// CI k3d node where 13 parallel tests compete for ~4 CPUs. Guaranteed QoS
// with 500m failed intermittently because the scheduler couldn't reserve
// the full amount during peak contention.
// Moderate requests (150m/64Mi) so the pod schedules on the shared CI
// k3d node where 13 parallel E2E tests compete for ~4 CPUs. Burstable QoS
// (no limits) lets the container burst to its actual ~200m CPU usage.
deploy := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "load-app",
Expand All @@ -569,8 +567,8 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
Args: []string{"--cpu", "1", "--cpu-load", "20", "--vm", "1", "--vm-bytes", "100M", "--timeout", "0"},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("300m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
corev1.ResourceCPU: resource.MustParse("150m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
},
Expand All @@ -580,10 +578,10 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
},
}
require.NoError(t, k8sClient.Create(ctx, deploy))
waitForDeploymentReady(t, "load-app", ns, 3*time.Minute)
waitForDeploymentReady(t, "load-app", ns, 120*time.Second)

loadPolicy := createPolicy(t, "load-policy", ns, "load-app", attunev1alpha1.UpdateTypeRecommend)
maxCPU, err := resource.ParseQuantity("250m")
maxCPU, err := resource.ParseQuantity("80m")
require.NoError(t, err)
require.NoError(t, retry.RetryOnConflict(retry.DefaultRetry, func() error {
var latestPolicy attunev1alpha1.AttunePolicy
Expand All @@ -594,13 +592,8 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
return k8sClient.Update(ctx, &latestPolicy)
}))

// Wait for the updated policy to produce a recommendation below the current request,
// proving the operator detected overprovisioning.
// CI note: this test is intentionally load-sensitive (synthetic stress-ng + recommendation engine
// + Prometheus scrape). Under parallel E2E load on GitHub-hosted k3d nodes it can take
// several minutes for the first recommendation + MaxAllowed bound to appear. We give it extra
// patience here only; all other Go E2E tests use shorter deadlines.
require.NoError(t, wait.PollUntilContextTimeout(ctx, 5*time.Second, 6*time.Minute, true, func(ctx context.Context) (bool, error) {
// Wait for the operator to produce a recommendation based on actual usage.
require.NoError(t, wait.PollUntilContextTimeout(ctx, 5*time.Second, 3*time.Minute, true, func(ctx context.Context) (bool, error) {
var latestPolicy attunev1alpha1.AttunePolicy
if err := k8sClient.Get(ctx, types.NamespacedName{Name: "load-policy", Namespace: ns}, &latestPolicy); err != nil {
return false, nil
Expand All @@ -612,9 +605,16 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
latestPolicy.Status.Workloads.WithRecommendations, len(latestPolicy.Status.Recommendations))
return false, nil
}
recCPU := latestPolicy.Status.Recommendations[0].Containers[0].Recommended.CPURequest.MilliValue()
t.Logf("Current CPU recommendation: %dm (waiting for <= 250m and < 300m)", recCPU)
return recCPU <= 250 && recCPU < 300, nil
container := latestPolicy.Status.Recommendations[0].Containers[0]
// Wait for a complete explanation, which proves the recommendation
// is based on real Prometheus metrics (not a premature empty result).
if container.Explanation == nil || container.Explanation.CPU == nil {
t.Log("load-policy: recommendation exists but CPU explanation not yet populated")
return false, nil
}
recCPU := container.Recommended.CPURequest.MilliValue()
t.Logf("Current CPU recommendation: %dm (waiting for <= 80m)", recCPU)
return recCPU <= 80, nil
}))

var latestPolicy attunev1alpha1.AttunePolicy
Expand All @@ -624,21 +624,18 @@ func TestE2E_RealisticLoad_Overprovisioned(t *testing.T) {
rec := latestPolicy.Status.Recommendations[0]
require.NotEmpty(t, rec.Containers)

// CPU recommendation should be within MaxAllowed and below the current 300m request.
// CPU recommendation should be within MaxAllowed and reflect actual usage.
recCPU := rec.Containers[0].Recommended.CPURequest
assert.LessOrEqual(t, recCPU.MilliValue(), int64(250),
"recommended CPU should respect the 250m MaxAllowed, got %s", recCPU.String())
assert.Less(t, recCPU.MilliValue(), int64(300),
"recommended CPU should be below the 300m request (overprovisioned), got %s", recCPU.String())
assert.LessOrEqual(t, recCPU.MilliValue(), int64(80),
"recommended CPU should respect the 80m MaxAllowed, got %s", recCPU.String())

cpuExplain := rec.Containers[0].Explanation
require.NotNil(t, cpuExplain)
require.NotNil(t, cpuExplain.CPU)
assert.Equal(t, "max", cpuExplain.CPU.BoundsApplied,
"load test should observe the CPU max bound being applied")

// Savings estimate should be non-empty when the recommendation (400m) lowers
// the current request (500m).
// Savings estimate should be computed for this workload.
assert.NotEmpty(t, latestPolicy.Status.Savings.EstimatedMonthlySavings,
"savings estimate should be computed for overprovisioned workload")
}
Expand Down
Loading