Skip to content

Commit 9b94f0c

Browse files
committed
fix e2e tests
1 parent e40d310 commit 9b94f0c

8 files changed

Lines changed: 39 additions & 12 deletions

File tree

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ IMAGE_TAG_BASE ?= ghcr.io/llm-d
33
IMG_TAG ?= latest
44
IMG ?= $(IMAGE_TAG_BASE)/workload-variant-autoscaler:$(IMG_TAG)
55
KIND_ARGS ?= -t mix -n 3 -g 2 # Default: 3 nodes, 2 GPUs per node, mixed vendors
6-
CLUSTER_GPU_TYPE ?= mix
6+
CLUSTER_GPU_TYPE ?= nvidia-mix
77
CLUSTER_NODES ?= 3
88
CLUSTER_GPUS ?= 4
99
KUBECONFIG ?= $(HOME)/.kube/config
@@ -155,7 +155,7 @@ test-e2e: manifests generate fmt vet ## Run the e2e tests. Expected an isolated
155155
}
156156
$(eval FOCUS_ARGS := $(if $(FOCUS),-ginkgo.focus="$(FOCUS)",))
157157
$(eval SKIP_ARGS := $(if $(SKIP),-ginkgo.skip="$(SKIP)",))
158-
export COLLECTOR_V2=1 KUBECONFIG=$(KUBECONFIG) K8S_EXPECTED_VERSION=$(K8S_VERSION) && go test ./test/e2e-saturation-based/ -timeout 50m -v -ginkgo.v $(FOCUS_ARGS) $(SKIP_ARGS)
158+
export COLLECTOR_V2=1 KUBECONFIG=$(KUBECONFIG) K8S_EXPECTED_VERSION=$(K8S_VERSION) && go test ./test/e2e-saturation-based/ -timeout 60m -v -ginkgo.v $(FOCUS_ARGS) $(SKIP_ARGS)
159159

160160
# E2E tests on OpenShift cluster
161161
# Supports KUBECONFIG or in-cluster authentication (for self-hosted runners).

internal/engines/pipeline/default_limiter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func (m *mockTypeAllocator) Remaining() int {
9898

9999
// mockAlgorithm implements AllocationAlgorithm for testing
100100
type mockAlgorithm struct {
101-
name string
101+
name string
102102
allocateFunc func(ctx context.Context, decisions []*interfaces.VariantDecision, allocator ResourceAllocator) error
103103
}
104104

internal/engines/pipeline/greedy_saturation_algorithm_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ var _ = Describe("GreedyBySaturation", func() {
162162
Expect(err).NotTo(HaveOccurred())
163163

164164
// Only 3 GPUs available, 2 GPUs per replica = 1 replica can be added
165-
Expect(decisions[0].GPUsAllocated).To(Equal(2)) // Only full replicas count
165+
Expect(decisions[0].GPUsAllocated).To(Equal(2)) // Only full replicas count
166166
Expect(decisions[0].TargetReplicas).To(Equal(2)) // 1 + 1 replica
167167
Expect(decisions[0].WasLimited).To(BeTrue())
168168
})

test/e2e-saturation-based/e2e_saturation_suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ var (
5757
const (
5858
maximumAvailableGPUs = 4
5959
numNodes = 3
60-
gpuTypes = "mix"
60+
gpuTypes = "nvidia-mix"
6161

6262
kindClusterName = "wva-gpu-cluster"
6363
)

test/e2e-saturation-based/e2e_scale_from_zero_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ var _ = Describe("Test workload-variant-autoscaler - Scale-From-Zero Feature", O
7373
appLabel = name
7474
namespace = llmDNamespace
7575
port = 8000
76-
modelName = llamaModelId + "-sfz"
76+
modelName = llamaModelId
7777
gatewayService = "infra-sim-inference-gateway"
7878

7979
// Start with 0 replicas to test scale-from-zero

test/e2e-saturation-based/e2e_scale_to_zero_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,8 @@ retention_period: %s`, modelName, retentionPeriodShort),
283283
namespace,
284284
fmt.Sprintf("http://%s:%d", gatewayName, 80),
285285
modelName,
286-
loadRatePerSecond,
287-
maxExecutionTimeSec,
286+
5, // Reduced rate (was loadRatePerSecond=8)
287+
10, // Drastically reduced duration to prevent queue backlog (was 60s)
288288
inputTokens,
289289
outputTokens,
290290
k8sClient,

test/utils/e2eutils.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ func IsCertManagerCRDsInstalled() bool {
283283

284284
// LoadImageToKindClusterWithName loads a local docker image to the kind cluster
285285
func LoadImageToKindClusterWithName(name string, maxGPUs int) error {
286-
cluster, err := CheckIfClusterExistsOrCreate(maxGPUs)
286+
cluster, err := CheckIfClusterExistsOrCreate(maxGPUs, "mix")
287287
if err != nil {
288288
return err
289289
}
@@ -293,7 +293,7 @@ func LoadImageToKindClusterWithName(name string, maxGPUs int) error {
293293
return err
294294
}
295295

296-
func CheckIfClusterExistsOrCreate(maxGPUs int) (string, error) {
296+
func CheckIfClusterExistsOrCreate(maxGPUs int, gpuType string) (string, error) {
297297
// Check if the kind cluster exists
298298
existsCmd := exec.Command("kind", "get", "clusters")
299299
output, err := Run(existsCmd)
@@ -312,7 +312,7 @@ func CheckIfClusterExistsOrCreate(maxGPUs int) (string, error) {
312312
// Create the kind cluster if it doesn't exist
313313
expectedVersion := os.Getenv("K8S_EXPECTED_VERSION")
314314
if !clusterExists {
315-
scriptCmd := exec.Command("bash", "deploy/kind-emulator/setup.sh", "-g", fmt.Sprintf("%d", maxGPUs), "K8S_VERSION="+expectedVersion)
315+
scriptCmd := exec.Command("bash", "deploy/kind-emulator/setup.sh", "-g", fmt.Sprintf("%d", maxGPUs), "-t", gpuType, "K8S_VERSION="+expectedVersion)
316316
if _, err := Run(scriptCmd); err != nil {
317317
return "", fmt.Errorf("failed to create kind cluster: %v", err)
318318
}
@@ -1240,7 +1240,7 @@ func SetupTestEnvironment(image string, numNodes, gpusPerNode int, gpuTypes stri
12401240
gom.Expect(os.Setenv("CLUSTER_NAME", clusterName)).To(gom.Succeed())
12411241
setEnvIfNotSet("CLUSTER_NODES", fmt.Sprintf("%d", numNodes))
12421242
setEnvIfNotSet("CLUSTER_GPUS", fmt.Sprintf("%d", gpusPerNode))
1243-
setEnvIfNotSet("CLUSTER_GPU_TYPE", gpuTypes) // Use CLUSTER_GPU_TYPE to match Makefile
1243+
setEnvIfNotSet("CLUSTER_GPU_TYPE", gpuTypes) // Use CLUSTER_GPU_TYPE to match Makefile
12441244
gom.Expect(os.Setenv("WVA_IMAGE_PULL_POLICY", "IfNotPresent")).To(gom.Succeed()) // The image is built locally by the tests
12451245
gom.Expect(os.Setenv("CREATE_CLUSTER", "true")).To(gom.Succeed()) // Always create a new cluster for E2E tests
12461246

test/utils/resources/llmdsim.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func CreateLlmdSimDeployment(namespace, deployName, modelName, appLabel, port st
5353
"--enable-kvcache",
5454
"--kv-cache-size=1024",
5555
"--block-size=16",
56+
"--tokenizers-cache-dir=/tmp",
5657
},
5758
Env: []corev1.EnvVar{
5859
{Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{
@@ -67,6 +68,12 @@ func CreateLlmdSimDeployment(namespace, deployName, modelName, appLabel, port st
6768
FieldPath: "metadata.namespace",
6869
},
6970
}},
71+
{Name: "POD_IP", ValueFrom: &corev1.EnvVarSource{
72+
FieldRef: &corev1.ObjectFieldSelector{
73+
APIVersion: "v1",
74+
FieldPath: "status.podIP",
75+
},
76+
}},
7077
},
7178
Ports: []corev1.ContainerPort{
7279
{ContainerPort: 8000, Name: "http", Protocol: corev1.ProtocolTCP},
@@ -105,6 +112,7 @@ func CreateLlmdSimDeploymentWithGPU(namespace, deployName, modelName, appLabel,
105112
"--enable-kvcache",
106113
"--kv-cache-size=1024",
107114
"--block-size=16",
115+
"--tokenizers-cache-dir=/tmp",
108116
},
109117
Env: []corev1.EnvVar{
110118
{Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{
@@ -119,6 +127,12 @@ func CreateLlmdSimDeploymentWithGPU(namespace, deployName, modelName, appLabel,
119127
FieldPath: "metadata.namespace",
120128
},
121129
}},
130+
{Name: "POD_IP", ValueFrom: &corev1.EnvVarSource{
131+
FieldRef: &corev1.ObjectFieldSelector{
132+
APIVersion: "v1",
133+
FieldPath: "status.podIP",
134+
},
135+
}},
122136
},
123137
Ports: []corev1.ContainerPort{
124138
{ContainerPort: 8000, Name: "http", Protocol: corev1.ProtocolTCP},
@@ -185,6 +199,19 @@ func CreateLlmdSimDeploymentWithGPUAndNodeSelector(
185199

186200
if len(nodeSelector) > 0 {
187201
deployment.Spec.Template.Spec.NodeSelector = nodeSelector
202+
// Add tolerations for control-plane nodes as H100s might be on control-plane in kind-emulator
203+
deployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{
204+
{
205+
Key: "node-role.kubernetes.io/control-plane",
206+
Operator: corev1.TolerationOpExists,
207+
Effect: corev1.TaintEffectNoSchedule,
208+
},
209+
{
210+
Key: "node-role.kubernetes.io/master",
211+
Operator: corev1.TolerationOpExists,
212+
Effect: corev1.TaintEffectNoSchedule,
213+
},
214+
}
188215
}
189216

190217
return deployment

0 commit comments

Comments
 (0)