Skip to content

Commit 0a64cd5

Browse files
shmuelklionelvillard
authored andcommitted
test: Scale up and down the model server during an end to end test (llm-d#354)
* Added a helper to scale up/down deployments Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Added a test in which the model server is scaled up and down Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Fixed typo Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Fixed lint issue Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Restored code commented out for debugging Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Updates due to review comments Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Added comments for clarification Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Changes due to refactoring done in prior PRs Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> --------- Signed-off-by: Shmuel Kallner <kallner@il.ibm.com>
1 parent 8577011 commit 0a64cd5

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

test/e2e/e2e_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,60 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() {
129129
testutils.DeleteObjects(testConfig, modelServers)
130130
})
131131
})
132+
133+
ginkgo.When("Scaling up and down the model servers", func() {
134+
ginkgo.It("should distribute inference requests across all model servers", func() {
135+
modelServers := createModelServers(false, false, 1, 0, 0)
136+
137+
epp := createEndPointPicker(scaleConfig)
138+
139+
prefillPods, decodePods := getModelServerPods(podSelector, prefillSelector, decodeSelector)
140+
gomega.Expect(prefillPods).Should(gomega.BeEmpty())
141+
gomega.Expect(decodePods).Should(gomega.HaveLen(1))
142+
143+
var nsHdr, podHdr string
144+
for range 5 {
145+
nsHdr, podHdr = runCompletion(simplePrompt, modelName)
146+
gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
147+
gomega.Expect(podHdr).Should(gomega.Equal(decodePods[0]))
148+
}
149+
150+
scaleDeployment(modelServers, 1)
151+
152+
scaledUpPrefillPods, scaledUpDecodePods := getModelServerPods(podSelector, prefillSelector, decodeSelector)
153+
gomega.Expect(scaledUpPrefillPods).Should(gomega.BeEmpty())
154+
gomega.Expect(scaledUpDecodePods).Should(gomega.HaveLen(2))
155+
156+
var scaledNsHdr, scaledPodHdr string
157+
// Run inference multiple times until one is scheduled on the new pod
158+
for range 30 {
159+
scaledNsHdr, scaledPodHdr = runCompletion(extraPrompt, modelName)
160+
gomega.Expect(scaledNsHdr).Should(gomega.Equal(nsName))
161+
gomega.Expect(scaledPodHdr).Should(gomega.BeElementOf(scaledUpDecodePods))
162+
if scaledPodHdr != podHdr {
163+
break
164+
}
165+
}
166+
gomega.Expect(scaledPodHdr).ShouldNot(gomega.Equal(podHdr))
167+
168+
scaleDeployment(modelServers, -1)
169+
170+
scaledDownPrefillPods, scaledDownDecodePods := getModelServerPods(podSelector, prefillSelector, decodeSelector)
171+
gomega.Expect(scaledDownPrefillPods).Should(gomega.BeEmpty())
172+
gomega.Expect(scaledDownDecodePods).Should(gomega.HaveLen(1))
173+
gomega.Expect(scaledDownDecodePods[0]).Should(gomega.BeElementOf(scaledUpDecodePods))
174+
175+
// Run multiple times and insure that they are scheduled on the remaining pod
176+
for range 5 {
177+
nsHdr, podHdr = runCompletion(simplePrompt, modelName)
178+
gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
179+
gomega.Expect(podHdr).Should(gomega.Equal(scaledDownDecodePods[0]))
180+
}
181+
182+
testutils.DeleteObjects(testConfig, epp)
183+
testutils.DeleteObjects(testConfig, modelServers)
184+
})
185+
})
132186
})
133187

134188
// createModelServers creates the model server resources used for testing from the given filePaths.
@@ -341,3 +395,15 @@ schedulingProfiles:
341395
- pluginRef: precise-prefix-cache-scorer
342396
weight: 10
343397
`
398+
399+
// EPP configuration for running scale model server test
400+
const scaleConfig = `apiVersion: inference.networking.x-k8s.io/v1alpha1
401+
kind: EndpointPickerConfig
402+
plugins:
403+
- type: max-score-picker
404+
- type: single-profile-handler
405+
schedulingProfiles:
406+
- name: default
407+
plugins:
408+
- pluginRef: max-score-picker
409+
`

test/e2e/utils_test.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,44 @@ import (
1111
"github.com/onsi/gomega/gexec"
1212
appsv1 "k8s.io/api/apps/v1"
1313
corev1 "k8s.io/api/core/v1"
14+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1415
apilabels "k8s.io/apimachinery/pkg/labels"
1516
"k8s.io/apimachinery/pkg/types"
17+
"k8s.io/client-go/kubernetes"
1618
"sigs.k8s.io/controller-runtime/pkg/client"
19+
"sigs.k8s.io/controller-runtime/pkg/client/config"
1720
)
1821

22+
const (
23+
deploymentKind = "deployment"
24+
)
25+
26+
func scaleDeployment(objects []string, increment int) {
27+
k8sCfg := config.GetConfigOrDie()
28+
client, err := kubernetes.NewForConfig(k8sCfg)
29+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
30+
direction := "up"
31+
absIncrement := increment
32+
if increment < 0 {
33+
direction = "down"
34+
absIncrement = -increment
35+
}
36+
37+
for _, kindAndName := range objects {
38+
split := strings.Split(kindAndName, "/")
39+
if strings.ToLower(split[0]) == deploymentKind {
40+
ginkgo.By(fmt.Sprintf("Scaling the deployment %s %s by %d", split[1], direction, absIncrement))
41+
scale, err := client.AppsV1().Deployments(nsName).GetScale(testConfig.Context, split[1], v1.GetOptions{})
42+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
43+
44+
scale.Spec.Replicas += int32(increment)
45+
_, err = client.AppsV1().Deployments(nsName).UpdateScale(testConfig.Context, split[1], scale, v1.UpdateOptions{})
46+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
47+
}
48+
}
49+
podsInDeploymentsReady(objects)
50+
}
51+
1952
// getModelServerPods Returns the list of Prefill and Decode vLLM pods separately
2053
func getModelServerPods(podLabels, prefillLabels, decodeLabels map[string]string) ([]string, []string) {
2154
pods := getPods(podLabels)
@@ -73,11 +106,12 @@ func podsInDeploymentsReady(objects []string) {
73106
var deployment appsv1.Deployment
74107
helper := func(deploymentName string) bool {
75108
err := testConfig.K8sClient.Get(testConfig.Context, types.NamespacedName{Namespace: nsName, Name: deploymentName}, &deployment)
76-
return err == nil && deployment.Status.Replicas == deployment.Status.ReadyReplicas
109+
return err == nil && *deployment.Spec.Replicas == deployment.Status.Replicas &&
110+
deployment.Status.Replicas == deployment.Status.ReadyReplicas
77111
}
78112
for _, kindAndName := range objects {
79113
split := strings.Split(kindAndName, "/")
80-
if strings.ToLower(split[0]) == "deployment" {
114+
if strings.ToLower(split[0]) == deploymentKind {
81115
ginkgo.By(fmt.Sprintf("Waiting for pods of %s to be ready", split[1]))
82116
gomega.Eventually(helper, readyTimeout, interval).WithArguments(split[1]).Should(gomega.BeTrue())
83117
}

0 commit comments

Comments
 (0)