diff --git a/cluster-autoscaler/e2e/cluster_size_autoscaling.go b/cluster-autoscaler/e2e/cluster_size_autoscaling.go index d3df35c655af..cd8f35c9fc50 100644 --- a/cluster-autoscaler/e2e/cluster_size_autoscaling.go +++ b/cluster-autoscaler/e2e/cluster_size_autoscaling.go @@ -26,6 +26,7 @@ import ( yaml "go.yaml.in/yaml/v2" v1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" + resourcev1 "k8s.io/api/resource/v1" schedulingv1 "k8s.io/api/scheduling/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -73,6 +74,9 @@ const ( highPriorityClassName = "high-priority" nonExistingBypassedSchedulerName = "non-existing-bypassed-scheduler" + + draDeviceCapacity = 4 // Devices per node + draDeviceClassName = "gpu" ) // Test assumes that the cluster has a minimum number of nodes at the start of the test. @@ -97,7 +101,7 @@ var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), framework. var nodeCount int var memAllocatableMb int - ginkgo.BeforeEach(func(ctx context.Context) { + setupAutoscalingTest := func(ctx context.Context) { c = f.ClientSet _, err := c.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{}) if err != nil { @@ -122,331 +126,432 @@ var _ = SIGDescribe("Cluster size autoscaling", framework.WithSlow(), framework. mem := nodes.Items[0].Status.Allocatable[v1.ResourceMemory] memAllocatableMb = int((&mem).Value() / 1024 / 1024) - // As the last deferred cleanup ensure that the state is restored. - // AfterEach does not allow for this because it runs before other deferred - // cleanups happen, and they are blocking cluster restoring its initial size. - ginkgo.DeferCleanup(func(ctx context.Context) { - ginkgo.By("Restoring the state after test") - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount }, scaleDownTimeout)) - nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - framework.ExpectNoError(err) + } + + cleanupAutoscalingTest := func(ctx context.Context) { + ginkgo.By("Restoring the state after test") + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount }, scaleDownTimeout)) + nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + framework.ExpectNoError(err) + + s := time.Now() + makeSchedulableLoop: + for start := time.Now(); time.Since(start) < makeSchedulableTimeout; time.Sleep(makeSchedulableDelay) { + var criticalAddonsOnlyErrorType *CriticalAddonsOnlyError + for _, n := range nodes.Items { + err = makeNodeSchedulable(ctx, c, &n, true) + if err != nil && errors.As(err, &criticalAddonsOnlyErrorType) { + continue makeSchedulableLoop + } else if err != nil { + klog.Infof("Error during cleanup: %v", err) + } + } + break + } + klog.Infof("Made nodes schedulable again in %v", time.Since(s).String()) + } - s := time.Now() - makeSchedulableLoop: - for start := time.Now(); time.Since(start) < makeSchedulableTimeout; time.Sleep(makeSchedulableDelay) { - var criticalAddonsOnlyErrorType *CriticalAddonsOnlyError - for _, n := range nodes.Items { - err = makeNodeSchedulable(ctx, c, &n, true) - if err != nil && errors.As(err, &criticalAddonsOnlyErrorType) { - continue makeSchedulableLoop - } else if err != nil { - klog.Infof("Error during cleanup: %v", err) + f.Context("Standard Autoscaling", func() { + ginkgo.BeforeEach(func(ctx context.Context) { + setupAutoscalingTest(ctx) + ginkgo.DeferCleanup(cleanupAutoscalingTest) + }) + + f.It("shouldn't increase cluster size if pending pod is too large", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + ginkgo.By("Creating unschedulable pod") + ReserveMemory(ctx, f, "memory-reservation", 1, int(1.1*float64(memAllocatableMb)), false, defaultTimeout) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") + + ginkgo.By("Waiting for scale up hoping it won't happen") + // Verify that the appropriate event was generated + eventFound := false + EventsLoop: + for start := time.Now(); time.Since(start) < scaleUpTimeout; time.Sleep(20 * time.Second) { + ginkgo.By("Waiting for NotTriggerScaleUp event") + events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) + framework.ExpectNoError(err) + + for _, e := range events.Items { + if e.InvolvedObject.Kind == "Pod" && e.Reason == "NotTriggerScaleUp" { + ginkgo.By("NotTriggerScaleUp event found") + eventFound = true + break EventsLoop } } - break } - klog.Infof("Made nodes schedulable again in %v", time.Since(s).String()) + if !eventFound { + framework.Failf("Expected event with kind 'Pod' and reason 'NotTriggerScaleUp' not found.") + } + // Verify that cluster size is not changed + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size <= nodeCount }, time.Second)) }) - }) - f.It("shouldn't increase cluster size if pending pod is too large", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - ginkgo.By("Creating unschedulable pod") - ReserveMemory(ctx, f, "memory-reservation", 1, int(1.1*float64(memAllocatableMb)), false, defaultTimeout) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") + simpleScaleUpTest := func(ctx context.Context, unready int) { + ReserveMemory(ctx, f, "memory-reservation", 100, nodeCount*memAllocatableMb, false, 1*time.Second) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") - ginkgo.By("Waiting for scale up hoping it won't happen") - // Verify that the appropriate event was generated - eventFound := false - EventsLoop: - for start := time.Now(); time.Since(start) < scaleUpTimeout; time.Sleep(20 * time.Second) { - ginkgo.By("Waiting for NotTriggerScaleUp event") - events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) + // Verify that cluster size is increased + framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, + func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout, unready)) + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + } + + f.It("should increase cluster size if pending pods are small", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + simpleScaleUpTest(ctx, 0) + }) + + f.It("shouldn't trigger additional scale-ups during processing scale-up", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + e2eskipper.Skipf("Test is flaky and disabled for now") + // Wait for the situation to stabilize - CA should be running and have up-to-date node readiness info. + status, err := waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { + return s.ready == s.target && s.ready <= nodeCount + }, scaleUpTriggerTimeout) framework.ExpectNoError(err) - for _, e := range events.Items { - if e.InvolvedObject.Kind == "Pod" && e.Reason == "NotTriggerScaleUp" { - ginkgo.By("NotTriggerScaleUp event found") - eventFound = true - break EventsLoop - } - } - } - if !eventFound { - framework.Failf("Expected event with kind 'Pod' and reason 'NotTriggerScaleUp' not found.") - } - // Verify that cluster size is not changed - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size <= nodeCount }, time.Second)) - }) + unmanagedNodes := nodeCount - status.ready - simpleScaleUpTest := func(ctx context.Context, unready int) { - ReserveMemory(ctx, f, "memory-reservation", 100, nodeCount*memAllocatableMb, false, 1*time.Second) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") + ginkgo.By("Schedule more pods than can fit and wait for cluster to scale-up") + ReserveMemory(ctx, f, "memory-reservation", 100, nodeCount*memAllocatableMb, false, 1*time.Second) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") - // Verify that cluster size is increased - framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, - func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout, unready)) - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - } + status, err = waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { + return s.status == caOngoingScaleUpStatus + }, scaleUpTriggerTimeout) + framework.ExpectNoError(err) + target := status.target + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - f.It("should increase cluster size if pending pods are small", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - simpleScaleUpTest(ctx, 0) - }) + ginkgo.By("Expect no more scale-up to be happening after all pods are scheduled") - f.It("shouldn't trigger additional scale-ups during processing scale-up", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - e2eskipper.Skipf("Test is flaky and disabled for now") - // Wait for the situation to stabilize - CA should be running and have up-to-date node readiness info. - status, err := waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { - return s.ready == s.target && s.ready <= nodeCount - }, scaleUpTriggerTimeout) - framework.ExpectNoError(err) + // wait for a while until scale-up finishes; we cannot read CA status immediately + // after pods are scheduled as status config map is updated by CA once every loop iteration + status, err = waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { + return s.status == caNoScaleUpStatus + }, 2*freshStatusLimit) + framework.ExpectNoError(err) - unmanagedNodes := nodeCount - status.ready + if status.target != target { + klog.Warningf("Final number of nodes (%v) does not match initial scale-up target (%v).", status.target, target) + } + gomega.Expect(status.timestamp.Add(freshStatusLimit)).To(gomega.BeTemporally(">=", time.Now())) + gomega.Expect(status.status).To(gomega.Equal(caNoScaleUpStatus)) + gomega.Expect(status.ready).To(gomega.Equal(status.target)) + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) + framework.ExpectNoError(err) + gomega.Expect(nodes.Items).To(gomega.HaveLen(status.target + unmanagedNodes)) + }) - ginkgo.By("Schedule more pods than can fit and wait for cluster to scale-up") - ReserveMemory(ctx, f, "memory-reservation", 100, nodeCount*memAllocatableMb, false, 1*time.Second) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "memory-reservation") + f.It("should increase cluster size if pods are pending due to host port conflict", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + scheduling.CreateHostPortPods(ctx, f, "host-port", nodeCount+2, false) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "host-port") - status, err = waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { - return s.status == caOngoingScaleUpStatus - }, scaleUpTriggerTimeout) - framework.ExpectNoError(err) - target := status.target - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout)) + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + }) - ginkgo.By("Expect no more scale-up to be happening after all pods are scheduled") + f.It("should increase cluster size if pods are pending due to pod anti-affinity", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + pods := nodeCount + newPods := 2 + labels := map[string]string{ + "anti-affinity": "yes", + } + ginkgo.By("starting a pod with anti-affinity on each node") + framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, pods, "anti-affinity-pod", labels, labels)) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "anti-affinity-pod") + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - // wait for a while until scale-up finishes; we cannot read CA status immediately - // after pods are scheduled as status config map is updated by CA once every loop iteration - status, err = waitForScaleUpStatus(ctx, c, func(s *scaleUpStatus) bool { - return s.status == caNoScaleUpStatus - }, 2*freshStatusLimit) - framework.ExpectNoError(err) + ginkgo.By("scheduling extra pods with anti-affinity to existing ones") + framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, newPods, "extra-pod", labels, labels)) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "extra-pod") - if status.target != target { - klog.Warningf("Final number of nodes (%v) does not match initial scale-up target (%v).", status.target, target) - } - gomega.Expect(status.timestamp.Add(freshStatusLimit)).To(gomega.BeTemporally(">=", time.Now())) - gomega.Expect(status.status).To(gomega.Equal(caNoScaleUpStatus)) - gomega.Expect(status.ready).To(gomega.Equal(status.target)) - nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) - framework.ExpectNoError(err) - gomega.Expect(nodes.Items).To(gomega.HaveLen(status.target + unmanagedNodes)) - }) + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount+newPods }, scaleUpTimeout)) + }) - f.It("should increase cluster size if pods are pending due to host port conflict", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - scheduling.CreateHostPortPods(ctx, f, "host-port", nodeCount+2, false) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "host-port") + f.It("should increase cluster size if pod requesting EmptyDir volume is pending", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + ginkgo.By("creating pods") + pods := nodeCount + newPods := 1 + labels := map[string]string{ + "anti-affinity": "yes", + } + framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, pods, "anti-affinity-pod", labels, labels)) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "anti-affinity-pod") - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout)) - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - }) + ginkgo.By("waiting for all pods before triggering scale up") + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - f.It("should increase cluster size if pods are pending due to pod anti-affinity", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - pods := nodeCount - newPods := 2 - labels := map[string]string{ - "anti-affinity": "yes", - } - ginkgo.By("starting a pod with anti-affinity on each node") - framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, pods, "anti-affinity-pod", labels, labels)) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "anti-affinity-pod") - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + ginkgo.By("creating a pod requesting EmptyDir") + framework.ExpectNoError(runVolumeAntiAffinityPods(ctx, f, f.Namespace.Name, newPods, "extra-pod", labels, labels, emptyDirVolumes)) + ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "extra-pod") - ginkgo.By("scheduling extra pods with anti-affinity to existing ones") - framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, newPods, "extra-pod", labels, labels)) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "extra-pod") + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount+newPods }, scaleUpTimeout)) + }) - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount+newPods }, scaleUpTimeout)) - }) + f.It("should correctly scale down after a node is not needed", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + ginkgo.By("Increase cluster size") + cleanupFunc := increaseClusterSize(ctx, f, c, nodeCount+2) - f.It("should increase cluster size if pod requesting EmptyDir volume is pending", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - ginkgo.By("creating pods") - pods := nodeCount - newPods := 1 - labels := map[string]string{ - "anti-affinity": "yes", - } - framework.ExpectNoError(runAntiAffinityPods(ctx, f, f.Namespace.Name, pods, "anti-affinity-pod", labels, labels)) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "anti-affinity-pod") + ginkgo.By("Remove the RC to make nodes not needed any more") + framework.ExpectNoError(cleanupFunc()) - ginkgo.By("waiting for all pods before triggering scale up") - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + ginkgo.By("Some uneeded nodes should be removed") + framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, + func(size int) bool { return size < nodeCount+2 }, scaleDownTimeout, 0)) + }) - ginkgo.By("creating a pod requesting EmptyDir") - framework.ExpectNoError(runVolumeAntiAffinityPods(ctx, f, f.Namespace.Name, newPods, "extra-pod", labels, labels, emptyDirVolumes)) - ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, "extra-pod") + f.It("should be able to scale down when rescheduling a pod is required and pdb allows for it", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 1, 1, func(increasedSize int) { + ginkgo.By("Some node should be removed") + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + }) + }) - framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { return size == nodeCount+newPods }, scaleUpTimeout)) - }) + f.It("shouldn't be able to scale down when rescheduling a pod is required, but pdb doesn't allow drain", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 1, 0, func(increasedSize int) { + ginkgo.By("No nodes should be removed") + time.Sleep(scaleDownTimeout) + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) + framework.ExpectNoError(err) + gomega.Expect(nodes.Items).To(gomega.HaveLen(increasedSize)) + }) + }) - f.It("should correctly scale down after a node is not needed", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - ginkgo.By("Increase cluster size") - cleanupFunc := increaseClusterSize(ctx, f, c, nodeCount+2) + f.It("should be able to scale down by draining multiple pods one by one as dictated by pdb", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 2, 1, func(increasedSize int) { + ginkgo.By("Some node should be removed") + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + }) + }) - ginkgo.By("Remove the RC to make nodes not needed any more") - framework.ExpectNoError(cleanupFunc()) + f.It("should be able to scale down by draining system pods with pdb", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + runDrainTest(ctx, f, c, nodeCount, "kube-system", 2, 1, func(increasedSize int) { + ginkgo.By("Some node should be removed") + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + }) + }) - ginkgo.By("Some uneeded nodes should be removed") - framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, - func(size int) bool { return size < nodeCount+2 }, scaleDownTimeout, 0)) - }) + f.It("shouldn't scale up when expendable pod is created", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + createPriorityClasses(ctx, f) + // Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created. + ginkgo.DeferCleanup(ReserveMemoryWithPriority, f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), false, time.Second, expendablePriorityClassName) + ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, sleep for %s", scaleUpTimeout.String())) + time.Sleep(scaleUpTimeout) + // Verify that cluster size is not changed + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size == nodeCount }, time.Second)) + }) - f.It("should be able to scale down when rescheduling a pod is required and pdb allows for it", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 1, 1, func(increasedSize int) { - ginkgo.By("Some node should be removed") + f.It("should scale up when non expendable pod is created", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + createPriorityClasses(ctx, f) + // Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created. + cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, highPriorityClassName) + defer func() { + framework.ExpectNoError(cleanupFunc()) + }() + // Verify that cluster size is not changed framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + func(size int) bool { return size > nodeCount }, time.Second)) }) - }) - f.It("shouldn't be able to scale down when rescheduling a pod is required, but pdb doesn't allow drain", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 1, 0, func(increasedSize int) { - ginkgo.By("No nodes should be removed") - time.Sleep(scaleDownTimeout) - nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) - framework.ExpectNoError(err) - gomega.Expect(nodes.Items).To(gomega.HaveLen(increasedSize)) + f.It("shouldn't scale up when expendable pod is preempted", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + createPriorityClasses(ctx, f) + // Create nodesCountAfterResize pods allocating 0.7 allocatable on present nodes - one pod per node. + cleanupFunc1 := ReserveMemoryWithPriority(ctx, f, "memory-reservation1", nodeCount, int(float64(nodeCount)*float64(0.7)*float64(memAllocatableMb)), true, defaultTimeout, expendablePriorityClassName) + defer func() { + framework.ExpectNoError(cleanupFunc1()) + }() + // Create nodesCountAfterResize pods allocating 0.7 allocatable on present nodes - one pod per node. Pods created here should preempt pods created above. + cleanupFunc2 := ReserveMemoryWithPriority(ctx, f, "memory-reservation2", nodeCount, int(float64(nodeCount)*float64(0.7)*float64(memAllocatableMb)), true, defaultTimeout, highPriorityClassName) + defer func() { + framework.ExpectNoError(cleanupFunc2()) + }() + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size == nodeCount }, time.Second)) }) - }) - f.It("should be able to scale down by draining multiple pods one by one as dictated by pdb", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - runDrainTest(ctx, f, c, nodeCount, f.Namespace.Name, 2, 1, func(increasedSize int) { - ginkgo.By("Some node should be removed") + f.It("should scale down when expendable pod is running", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + createPriorityClasses(ctx, f) + increasedSize := nodeCount + 2 + cleanupIncreaseFunc := increaseClusterSize(ctx, f, c, increasedSize) + // Create increasedSize pods allocating 0.7 allocatable on present nodes - one pod per node. + cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", increasedSize, int(float64(increasedSize)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, expendablePriorityClassName) + defer func() { + framework.ExpectNoError(cleanupFunc()) + }() + ginkgo.By("Remove pods that increased the cluster size") + framework.ExpectNoError(cleanupIncreaseFunc()) + ginkgo.By("Waiting for scale down") framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + func(size int) bool { return size == nodeCount }, scaleDownTimeout)) }) - }) - f.It("should be able to scale down by draining system pods with pdb", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - runDrainTest(ctx, f, c, nodeCount, "kube-system", 2, 1, func(increasedSize int) { - ginkgo.By("Some node should be removed") + f.It("shouldn't scale down when non expendable pod is running", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + createPriorityClasses(ctx, f) + increasedSize := nodeCount + 2 + cleanupIncreased := increaseClusterSize(ctx, f, c, increasedSize) + // Create increasedSize pods allocating 0.7 allocatable on present nodes - one pod per node. + cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", increasedSize, int(float64(increasedSize)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, highPriorityClassName) + defer func() { + framework.ExpectNoError(cleanupFunc()) + }() + framework.ExpectNoError(cleanupIncreased()) + ginkgo.By(fmt.Sprintf("Waiting for scale down hoping it won't happen, sleep for %s", scaleDownTimeout.String())) + time.Sleep(scaleDownTimeout) framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size < increasedSize }, scaleDownTimeout)) + func(size int) bool { return size == increasedSize }, time.Second)) }) - }) - f.It("shouldn't scale up when expendable pod is created", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - createPriorityClasses(ctx, f) - // Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created. - ginkgo.DeferCleanup(ReserveMemoryWithPriority, f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), false, time.Second, expendablePriorityClassName) - ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, sleep for %s", scaleUpTimeout.String())) - time.Sleep(scaleUpTimeout) - // Verify that cluster size is not changed - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size == nodeCount }, time.Second)) - }) + f.It("should scale up when unprocessed pod is created and is going to be unschedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { + // 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods + replicaCount := 2 * nodeCount + reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) + cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName) + defer func() { + framework.ExpectNoError(cleanupFunc()) + }() + // Verify that cluster size is increased + ginkgo.By("Waiting for cluster scale-up") + sizeFunc := func(size int) bool { + // Softly checks scale-up since other types of machines can be added which would affect #nodes + return size > nodeCount + } + framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, sizeFunc, scaleUpTimeout, 0)) + }) - f.It("should scale up when non expendable pod is created", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - createPriorityClasses(ctx, f) - // Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created. - cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, highPriorityClassName) - defer func() { - framework.ExpectNoError(cleanupFunc()) - }() - // Verify that cluster size is not changed - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size > nodeCount }, time.Second)) - }) + runScaleUpNotTriggeredUnprocessedPodTest := func(ctx context.Context, replicaCount int, reservedMemory int, schedulerName string) { + cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, schedulerName) + defer func() { + framework.ExpectNoError(cleanupFunc()) + }() + // Verify that cluster size is the same + ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String())) + sizeFunc := func(size int) bool { + return size == nodeCount + } + gomega.Consistently(ctx, func() error { + return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second) + }).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred()) + } - f.It("shouldn't scale up when expendable pod is preempted", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { - createPriorityClasses(ctx, f) - // Create nodesCountAfterResize pods allocating 0.7 allocatable on present nodes - one pod per node. - cleanupFunc1 := ReserveMemoryWithPriority(ctx, f, "memory-reservation1", nodeCount, int(float64(nodeCount)*float64(0.7)*float64(memAllocatableMb)), true, defaultTimeout, expendablePriorityClassName) - defer func() { - framework.ExpectNoError(cleanupFunc1()) - }() - // Create nodesCountAfterResize pods allocating 0.7 allocatable on present nodes - one pod per node. Pods created here should preempt pods created above. - cleanupFunc2 := ReserveMemoryWithPriority(ctx, f, "memory-reservation2", nodeCount, int(float64(nodeCount)*float64(0.7)*float64(memAllocatableMb)), true, defaultTimeout, highPriorityClassName) - defer func() { - framework.ExpectNoError(cleanupFunc2()) - }() - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size == nodeCount }, time.Second)) - }) + f.It("shouldn't scale up when unprocessed pod is created and is going to be schedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { + // 50% of allocatable memory of a single node, so that no scale up would trigger in normal cases + replicaCount := 1 + reservedMemory := int(float64(0.5) * float64(memAllocatableMb)) + runScaleUpNotTriggeredUnprocessedPodTest(ctx, replicaCount, reservedMemory, nonExistingBypassedSchedulerName) + }) - f.It("should scale down when expendable pod is running", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - createPriorityClasses(ctx, f) - increasedSize := nodeCount + 2 - cleanupIncreaseFunc := increaseClusterSize(ctx, f, c, increasedSize) - // Create increasedSize pods allocating 0.7 allocatable on present nodes - one pod per node. - cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", increasedSize, int(float64(increasedSize)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, expendablePriorityClassName) - defer func() { - framework.ExpectNoError(cleanupFunc()) - }() - ginkgo.By("Remove pods that increased the cluster size") - framework.ExpectNoError(cleanupIncreaseFunc()) - ginkgo.By("Waiting for scale down") - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size == nodeCount }, scaleDownTimeout)) + f.It("shouldn't scale up when unprocessed pod is created and scheduler is not specified to be bypassed", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { + // 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods + replicaCount := 2 * nodeCount + reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) + schedulerName := "non-existent-scheduler-" + f.UniqueName + runScaleUpNotTriggeredUnprocessedPodTest(ctx, replicaCount, reservedMemory, schedulerName) + }) }) - f.It("shouldn't scale down when non expendable pod is running", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { - createPriorityClasses(ctx, f) - increasedSize := nodeCount + 2 - cleanupIncreased := increaseClusterSize(ctx, f, c, increasedSize) - // Create increasedSize pods allocating 0.7 allocatable on present nodes - one pod per node. - cleanupFunc := ReserveMemoryWithPriority(ctx, f, "memory-reservation", increasedSize, int(float64(increasedSize)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout, highPriorityClassName) - defer func() { - framework.ExpectNoError(cleanupFunc()) - }() - framework.ExpectNoError(cleanupIncreased()) - ginkgo.By(fmt.Sprintf("Waiting for scale down hoping it won't happen, sleep for %s", scaleDownTimeout.String())) - time.Sleep(scaleDownTimeout) - framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, - func(size int) bool { return size == increasedSize }, time.Second)) - }) + f.Context("DRA Autoscaling", func() { + // DRA Autoscaling tests use /manifests/dra-driver.yaml manifests. + ginkgo.BeforeEach(func(ctx context.Context) { + _, err := f.ClientSet.ResourceV1().DeviceClasses().Get(ctx, draDeviceClassName, metav1.GetOptions{}) + if err != nil { + e2eskipper.Skipf("test expects DRA driver to be installed (DeviceClass %q not found)", draDeviceClassName) + } + setupAutoscalingTest(ctx) + ginkgo.DeferCleanup(cleanupAutoscalingTest) + }) - f.It("should scale up when unprocessed pod is created and is going to be unschedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { - // 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods - replicaCount := 2 * nodeCount - reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) - cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, nonExistingBypassedSchedulerName) - defer func() { - framework.ExpectNoError(cleanupFunc()) - }() - // Verify that cluster size is increased - ginkgo.By("Waiting for cluster scale-up") - sizeFunc := func(size int) bool { - // Softly checks scale-up since other types of machines can be added which would affect #nodes - return size > nodeCount - } - framework.ExpectNoError(WaitForClusterSizeFuncWithUnready(ctx, f.ClientSet, sizeFunc, scaleUpTimeout, 0)) - }) + f.It("should scale up when pods request DRA resources", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + // Calculate replicas to force scale-up. + replicas := (nodeCount * draDeviceCapacity) + 1 - runScaleUpNotTriggeredUnprocessedPodTest := func(ctx context.Context, replicaCount int, reservedMemory int, schedulerName string) { - cleanupFunc := ReserveMemoryWithSchedulerName(ctx, f, "memory-reservation", replicaCount, reservedMemory, false, 1, schedulerName) - defer func() { - framework.ExpectNoError(cleanupFunc()) - }() - // Verify that cluster size is the same - ginkgo.By(fmt.Sprintf("Waiting for scale up hoping it won't happen, polling cluster size for %s", scaleUpTimeout.String())) - sizeFunc := func(size int) bool { - return size == nodeCount - } - gomega.Consistently(ctx, func() error { - return WaitForClusterSizeFunc(ctx, f.ClientSet, sizeFunc, time.Second) - }).WithTimeout(scaleUpTimeout).WithPolling(framework.Poll).ShouldNot(gomega.HaveOccurred()) - } + ginkgo.By(fmt.Sprintf("Creating %d pods requesting DRA devices to trigger scale-up", replicas)) - f.It("shouldn't scale up when unprocessed pod is created and is going to be schedulable", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { - // 50% of allocatable memory of a single node, so that no scale up would trigger in normal cases - replicaCount := 1 - reservedMemory := int(float64(0.5) * float64(memAllocatableMb)) - runScaleUpNotTriggeredUnprocessedPodTest(ctx, replicaCount, reservedMemory, nonExistingBypassedSchedulerName) - }) + rcName := "dra-scale-up-rc" + cleanup := ReserveDRA(ctx, f, rcName, replicas, draDeviceClassName, 1, false) + ginkgo.DeferCleanup(cleanup) - f.It("shouldn't scale up when unprocessed pod is created and scheduler is not specified to be bypassed", feature.ClusterScaleUpBypassScheduler, func(ctx context.Context) { - // 70% of allocatable memory of a single node * replica count, forcing a scale up in case of normal pods - replicaCount := 2 * nodeCount - reservedMemory := int(float64(replicaCount) * float64(0.7) * float64(memAllocatableMb)) - schedulerName := "non-existent-scheduler-" + f.UniqueName - runScaleUpNotTriggeredUnprocessedPodTest(ctx, replicaCount, reservedMemory, schedulerName) - }) + // Wait for cluster to scale up + expectedNodes := nodeCount + 1 + ginkgo.By(fmt.Sprintf("Waiting for cluster size to reach %d", expectedNodes)) + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { + return size == expectedNodes + }, scaleUpTimeout)) + + ginkgo.By("Waiting for all DRA pods to be ready") + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + }) + + f.It("shouldn't scale up if pod requests more DRA devices than node capacity", feature.ClusterSizeAutoscalingScaleUp, func(ctx context.Context) { + devicesPerPod := draDeviceCapacity + 1 + ginkgo.By(fmt.Sprintf("Creating pod requesting %d devices (capacity %d)", devicesPerPod, draDeviceCapacity)) + cleanup := ReserveDRA(ctx, f, "oversized-dra-pod", 1, draDeviceClassName, devicesPerPod, false) + ginkgo.DeferCleanup(cleanup) + + ginkgo.By("Waiting for NotTriggerScaleUp event") + eventFound := false + EventsLoop: + for start := time.Now(); time.Since(start) < scaleUpTimeout; time.Sleep(20 * time.Second) { + events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) + framework.ExpectNoError(err) + + for _, e := range events.Items { + if e.InvolvedObject.Kind == "Pod" && e.Reason == "NotTriggerScaleUp" { + ginkgo.By("NotTriggerScaleUp event found") + eventFound = true + break EventsLoop + } + } + } + if !eventFound { + framework.Failf("Expected event with kind 'Pod' and reason 'NotTriggerScaleUp' not found.") + } + // Verify that cluster size is not changed. + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, + func(size int) bool { return size == nodeCount }, time.Second)) + }) + + f.It("should correctly scale down with DRA node draining", feature.ClusterSizeAutoscalingScaleDown, func(ctx context.Context) { + increasedNodeCount := nodeCount + 1 + + // Scale cluster up by creating antiAffinity pods that request 1 device. + antiAffinityRcName := "dra-antiaffinity-rc" + cleanupAntiAffinityDRA := ReserveDRA(ctx, f, antiAffinityRcName, increasedNodeCount, draDeviceClassName, 1, true) + ginkgo.By(fmt.Sprintf("Creating %d DRA pods with anti-affinity to ensure one pod per node", increasedNodeCount)) + + // Expect a scale up. + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { + return size == nodeCount+1 + }, scaleUpTimeout)) + + // Add increasedNodeCount pods, each one reserves 2 devices. + // Each node should have utilization of 75% DRA devices. + // They should consist of 1 antiAffinity pod and 1 pod with 2 devices. + rcName := "dra-rc" + devicesPerPod := 2 + cleanupDRA := ReserveDRA(ctx, f, rcName, increasedNodeCount, draDeviceClassName, devicesPerPod, false) + ginkgo.By(fmt.Sprintf("Creating %d DRA pods with 2 devices per pod", increasedNodeCount)) + ginkgo.DeferCleanup(cleanupDRA) + + ginkgo.By("Waiting for all DRA pods to be ready") + framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) + + ginkgo.By("Removing the pods that increased the cluster size") + framework.ExpectNoError(cleanupAntiAffinityDRA(ctx)) + + ginkgo.By("The unneeded node should be removed by draining the DRA pod") + framework.ExpectNoError(WaitForClusterSizeFunc(ctx, c, func(size int) bool { + return size == nodeCount + }, scaleDownTimeout)) + }) + }) }) func runDrainTest(ctx context.Context, f *framework.Framework, c clientset.Interface, nodeCount int, namespace string, podsPerNode, pdbSize int, verifyFunction func(int)) { @@ -1074,3 +1179,116 @@ func isNodeTainted(node *v1.Node) bool { } return false } + +// ReserveDRA creates a ReplicationController where pods request a DRA device. +// It uses an Ephemeral ResourceClaimTemplate defined in the PodSpec. +func ReserveDRA(ctx context.Context, f *framework.Framework, id string, replicas int, deviceClass string, devicesPerPod int, antiAffinity bool) func(context.Context) error { + ginkgo.By(fmt.Sprintf("Running RC %s with %d replicas requesting DRA device %s", id, replicas, deviceClass)) + + claimName := "test-claim" + + rc := &v1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: id, + Namespace: f.Namespace.Name, + }, + Spec: v1.ReplicationControllerSpec{ + Replicas: func(i int) *int32 { x := int32(i); return &x }(replicas), + Selector: map[string]string{"name": id}, + Template: &v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": id}, + }, + Spec: v1.PodSpec{ + SecurityContext: &v1.PodSecurityContext{ + RunAsNonRoot: func(b bool) *bool { return &b }(true), + SeccompProfile: &v1.SeccompProfile{Type: v1.SeccompProfileTypeRuntimeDefault}, + RunAsUser: func(i int64) *int64 { return &i }(65534), // nobody + }, + Containers: []v1.Container{ + { + Name: "pause", + Image: imageutils.GetPauseImageName(), + Resources: v1.ResourceRequirements{ + Claims: []v1.ResourceClaim{ + {Name: claimName}, + }, + }, + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: func(b bool) *bool { return &b }(false), + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + }, + }, + }, + }, + ResourceClaims: []v1.PodResourceClaim{ + { + Name: claimName, + ResourceClaimTemplateName: &id, // We will create a template with the same name. + }, + }, + }, + }, + }, + } + + if antiAffinity { + rc.Spec.Template.Spec.Affinity = &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"name": id}, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + } + } + + // Create the ResourceClaimTemplate first. + template := &resourcev1.ResourceClaimTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: id, + Namespace: f.Namespace.Name, + }, + Spec: resourcev1.ResourceClaimTemplateSpec{ + Spec: resourcev1.ResourceClaimSpec{ + Devices: resourcev1.DeviceClaim{ + Requests: []resourcev1.DeviceRequest{ + { + Name: "req-1", + Exactly: &resourcev1.ExactDeviceRequest{ + DeviceClassName: deviceClass, + AllocationMode: resourcev1.DeviceAllocationModeExactCount, + Count: int64(devicesPerPod), + }, + }, + }, + }, + }, + }, + } + + _, err := f.ClientSet.ResourceV1().ResourceClaimTemplates(f.Namespace.Name).Create(ctx, template, metav1.CreateOptions{}) + framework.ExpectNoError(err, "Failed to create ResourceClaimTemplate") + + _, err = f.ClientSet.CoreV1().ReplicationControllers(f.Namespace.Name).Create(ctx, rc, metav1.CreateOptions{}) + framework.ExpectNoError(err, "Failed to create ReplicationController") + + return func(cleanupCtx context.Context) error { + // Clean up RC. + err1 := e2erc.DeleteRCAndWaitForGC(cleanupCtx, f.ClientSet, f.Namespace.Name, id) + if err1 != nil && !apierrors.IsNotFound(err1) { + return err1 + } + // Clean up Template. + err2 := f.ClientSet.ResourceV1().ResourceClaimTemplates(f.Namespace.Name).Delete(cleanupCtx, id, metav1.DeleteOptions{}) + if err2 != nil && !apierrors.IsNotFound(err2) { + return err2 + } + return nil + } +} diff --git a/cluster-autoscaler/e2e/manifests/dra-driver.yaml b/cluster-autoscaler/e2e/manifests/dra-driver.yaml new file mode 100644 index 000000000000..d65e767c14c5 --- /dev/null +++ b/cluster-autoscaler/e2e/manifests/dra-driver.yaml @@ -0,0 +1,117 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dra-example-driver-service-account + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dra-example-driver-role + namespace: kube-system +rules: + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [ "resource.k8s.io" ] + resources: [ "resourceslices", "resourceclaims", "resourceclaimtemplates", "deviceclasses" ] + verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: dra-example-driver-role-binding + namespace: kube-system +subjects: + - kind: ServiceAccount + name: dra-example-driver-service-account + namespace: kube-system +roleRef: + kind: ClusterRole + name: dra-example-driver-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: resource.k8s.io/v1 +kind: DeviceClass +metadata: + name: gpu +spec: + selectors: + - cel: + expression: "device.driver == 'gpu.example.com'" +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: dra-example-driver-kubeletplugin + namespace: kube-system + labels: + app.kubernetes.io/name: dra-example-driver + app.kubernetes.io/instance: dra-example-driver + app.kubernetes.io/component: kubeletplugin +spec: + selector: + matchLabels: + app.kubernetes.io/name: dra-example-driver + app.kubernetes.io/instance: dra-example-driver + app.kubernetes.io/component: kubeletplugin + template: + metadata: + labels: + app.kubernetes.io/name: dra-example-driver + app.kubernetes.io/instance: dra-example-driver + app.kubernetes.io/component: kubeletplugin + spec: + serviceAccountName: dra-example-driver-service-account + containers: + - name: plugin + securityContext: + privileged: true + image: registry.k8s.io/dra-example-driver/dra-example-driver:v0.2.1 + imagePullPolicy: IfNotPresent + command: [ "dra-example-kubeletplugin" ] + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + env: + - name: DRIVER_NAME + value: "gpu.example.com" + - name: DEVICE_PROFILE + value: "gpu" + - name: CDI_ROOT + value: /var/run/cdi + - name: KUBELET_REGISTRAR_DIRECTORY_PATH + value: "/var/lib/kubelet/plugins_registry" + - name: KUBELET_PLUGINS_DIRECTORY_PATH + value: "/var/lib/kubelet/plugins" + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: NUM_DEVICES + value: "4" + volumeMounts: + - name: plugins-registry + mountPath: "/var/lib/kubelet/plugins_registry" + - name: plugins + mountPath: "/var/lib/kubelet/plugins" + - name: cdi + mountPath: /var/run/cdi + volumes: + - name: plugins-registry + hostPath: + path: "/var/lib/kubelet/plugins_registry" + - name: plugins + hostPath: + path: "/var/lib/kubelet/plugins" + - name: cdi + hostPath: + path: /var/run/cdi diff --git a/cluster-autoscaler/hack/e2e/deploy-ca-on-gce-for-e2e.sh b/cluster-autoscaler/hack/e2e/deploy-ca-on-gce-for-e2e.sh index 2ef12e5a99a3..03345ee4d13b 100755 --- a/cluster-autoscaler/hack/e2e/deploy-ca-on-gce-for-e2e.sh +++ b/cluster-autoscaler/hack/e2e/deploy-ca-on-gce-for-e2e.sh @@ -41,11 +41,15 @@ echo "Configuring registry authentication..." mkdir -p "${HOME}/.docker" gcloud auth configure-docker -q -echo "Building and pushing image..." -pushd "${CA_ROOT}" >/dev/null -make execute-release REGISTRY=${REGISTRY} TAG=${TAG} -IMAGE="${REGISTRY}/cluster-autoscaler:${TAG}" -popd >/dev/null +if [[ -z "${IMAGE:-}" ]]; then + echo "Building and pushing image..." + pushd "${CA_ROOT}" >/dev/null + make execute-release REGISTRY=${REGISTRY} TAG=${TAG} + IMAGE="${REGISTRY}/cluster-autoscaler:${TAG}" + popd >/dev/null +else + echo "Using existing image: ${IMAGE}" +fi # Deploy echo "Deploying to cluster..." diff --git a/cluster-autoscaler/hack/e2e/run-e2e.sh b/cluster-autoscaler/hack/e2e/run-e2e.sh index 679374adbf9c..b237d05b7f2e 100755 --- a/cluster-autoscaler/hack/e2e/run-e2e.sh +++ b/cluster-autoscaler/hack/e2e/run-e2e.sh @@ -36,5 +36,26 @@ while [[ $# -gt 0 ]]; do esac done +# Build and push image once to be used in both steps. +GIT_COMMIT="$(git describe --always --dirty --exclude '*')" +export TAG="dev-${GIT_COMMIT}-$(date +%s)" + +echo "### STEP 1: Standard Autoscaling tests ###" +${CA_ROOT}/hack/e2e/deploy-ca-on-gce-for-e2e.sh +${CA_ROOT}/hack/e2e/run-e2e-tests.sh "Standard Autoscaling" + +echo "### STEP 2: DRA Autoscaling tests ###" +echo "Removing Cluster Autoscaler to reset its state..." +kubectl delete deployment cluster-autoscaler -n kube-system + +echo "Installing DRA driver..." +kubectl apply -f "${CA_ROOT}/e2e/manifests/dra-driver.yaml" + +echo "Redeploying Cluster Autoscaler..." +# Reuse CA image. +REGISTRY="gcr.io/$(gcloud config get core/project)" +export IMAGE="${REGISTRY}/cluster-autoscaler:${TAG}" ${CA_ROOT}/hack/e2e/deploy-ca-on-gce-for-e2e.sh -${CA_ROOT}/hack/e2e/run-e2e-tests.sh "${REMAINING_ARGS[@]}" + +echo "Running DRA tests..." +${CA_ROOT}/hack/e2e/run-e2e-tests.sh "DRA Autoscaling"