Skip to content

Commit 1b98069

Browse files
committed
Add mock mode, pull secret automation, and pod health improvements
Signed-off-by: Aneesh Puttur <aneeshputtur@gmail.com>
1 parent 16b3d99 commit 1b98069

6 files changed

Lines changed: 275 additions & 16 deletions

File tree

Makefile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ MODEL_SOURCE ?= hf
2020
MODEL ?=
2121
NO_CLEANUP ?=
2222
DISCOVER ?=
23+
MOCK ?=
24+
PULL_SECRET ?=
2325

2426
# Build flags
2527
GO_TEST_FLAGS ?= -v
@@ -57,6 +59,12 @@ endif
5759
ifdef DISCOVER
5860
TEST_FLAGS += -mode=discover
5961
endif
62+
ifdef MOCK
63+
TEST_FLAGS += -mock=$(MOCK)
64+
endif
65+
ifdef PULL_SECRET
66+
TEST_FLAGS += -pull-secret=$(PULL_SECRET)
67+
endif
6068

6169
.PHONY: help
6270
help: ## Show this help
@@ -79,6 +87,10 @@ help: ## Show this help
7987
@printf ' \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu NO_CLEANUP=1"
8088
@printf ' \033[36m%-25s\033[0m %s\n' "DISCOVER" "Validate existing deployment (skip deploy/cleanup)"
8189
@printf ' \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns"
90+
@printf ' \033[36m%-25s\033[0m %s\n' "MOCK" "Mock vLLM image for testing without GPU"
91+
@printf ' \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest"
92+
@printf ' \033[36m%-25s\033[0m %s\n' "PULL_SECRET" "Pull secret name to copy into namespace (default: auto-detect)"
93+
@printf ' \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu PULL_SECRET=my-registry-secret"
8294
@printf ' \033[36m%-25s\033[0m %s\n' "STORAGE_CLASS" "StorageClass for PVCs (default: cluster default)"
8395
@printf ' \033[33m%-25s\033[0m %s\n' "" "make cache-model TESTCASE=single-gpu STORAGE_CLASS=azurefile-rwx"
8496
@printf ' \033[36m%-25s\033[0m %s\n' "STORAGE_SIZE" "Override PVC storage size (default: from test case config)"
@@ -289,6 +301,9 @@ testcases: ## List available test cases grouped by category
289301
@echo "Discover (validate existing deployment):"
290302
@echo " make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns"
291303
@echo ""
304+
@echo "Mock mode (no GPU required):"
305+
@echo " make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest"
306+
@echo ""
292307

293308
.PHONY: models
294309
models: ## List models and URIs for all test cases

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,32 @@ make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns
144144
make test-profile-all DISCOVER=true NAMESPACE=my-ns
145145
```
146146

147+
## Mock Mode
148+
149+
Test the full framework lifecycle **without GPUs or real models**. The deployer replaces the vLLM container in each manifest with a lightweight mock image that serves the OpenAI-compatible API.
150+
151+
```bash
152+
# Run with mock image (no GPU required)
153+
make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest
154+
155+
# Works with any test case
156+
make test TESTCASE=pd MOCK=ghcr.io/aneeshkp/vllm-mock:latest
157+
158+
# Combine with other flags
159+
make test-profile-all MOCK=ghcr.io/aneeshkp/vllm-mock:latest NO_CLEANUP=1
160+
```
161+
162+
What mock mode does:
163+
- Replaces the `main` container in `spec.template` and `spec.prefill.template` with the mock image
164+
- Removes GPU resource requests/limits (uses minimal CPU/memory instead)
165+
- Skips the metrics scraping phase (mock doesn't expose real vLLM metrics)
166+
- Scheduler containers are **not** replaced — they run as normal
167+
168+
This is useful for:
169+
- CI/CD pipelines on clusters without GPUs
170+
- Testing framework changes without waiting for model downloads
171+
- Validating manifest structure and KServe operator behavior
172+
147173
## Configuration
148174

149175
### Flags
@@ -154,6 +180,8 @@ make test-profile-all DISCOVER=true NAMESPACE=my-ns
154180
| `MODEL` || Override model (e.g., `Qwen/Qwen2.5-7B-Instruct`) |
155181
| `MODEL_SOURCE` | `hf` | `hf` (HuggingFace direct) or `pvc` (pre-cached) |
156182
| `MANIFEST_REF` | `main` | Manifest repo branch (e.g., `3.4-ea1`, `3.4-ea2`) |
183+
| `MOCK` || Mock vLLM image (e.g., `ghcr.io/aneeshkp/vllm-mock:latest`) |
184+
| `PULL_SECRET` | auto-detect | Pull secret name to copy into namespace (skipped on OCP) |
157185
| `NO_CLEANUP` || Set to `1` to keep resources after test |
158186
| `DISCOVER` || Set to `true` to validate existing deployment (skip deploy/cleanup) |
159187
| `STORAGE_CLASS` | cluster default | StorageClass for PVCs |

docs/adding-test-cases.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ spec:
3434
route: {}
3535
gateway: {}
3636
template:
37+
imagePullSecrets:
38+
- name: redhat-pull-secret
3739
containers:
3840
- name: main
3941
resources:
@@ -66,6 +68,7 @@ Customize based on what you're testing:
6668
| Data/Expert parallelism | Add `parallelism:` section |
6769
| RDMA/RoCE | Add network annotations and `rdma/roce_gdr` resources |
6870
| Multiple GPUs | Set `nvidia.com/gpu: "8"` in resources |
71+
| Pull secrets | Add `imagePullSecrets` under `template` (auto-copied from `istio-system` at deploy time) |
6972

7073
Push to the manifest repo, then run `make setup` to pull the new manifest.
7174

@@ -116,6 +119,7 @@ cleanup: true
116119
| `name` | Unique test case name (used with `TESTCASE=`) |
117120
| `model.name` | HuggingFace model ID — can be overridden with `MODEL=` |
118121
| `model.uri` | Base model URI (framework patches this based on `MODEL_SOURCE`) |
122+
| `model.category` | Groups test cases in `make testcases`: `single-node-gpu`, `cache-aware`, `multi-node-gpu`, `deepseek` |
119123
| `model.cache.storageSize` | PVC size — can be overridden with `STORAGE_SIZE=` |
120124
| `deployment.manifestPath` | Manifest filename (looked up in `deploy/manifests/`) |
121125
| `deployment.readyTimeout` | Max wait for `LLMInferenceService` to become READY |
@@ -154,7 +158,7 @@ validation:
154158
Add the test case name to a profile in `configs/profiles/`:
155159

156160
```yaml
157-
# configs/profiles/full.yaml
161+
# configs/profiles/all.yaml
158162
testCases:
159163
- single-gpu
160164
- my-new-test # add here
@@ -192,6 +196,9 @@ make test TESTCASE=my-new-test MODEL=Qwen/Qwen2.5-7B-Instruct
192196
# Pre-cache model, then run with PVC
193197
make cache-model TESTCASE=my-new-test
194198
make test TESTCASE=my-new-test MODEL_SOURCE=pvc
199+
200+
# Run with mock image (no GPU required)
201+
make test TESTCASE=my-new-test MOCK=ghcr.io/aneeshkp/vllm-mock:latest
195202
```
196203

197204
## File checklist

framework/deployer/deployer.go

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ type Deployer struct {
2929
Platform Platform
3030
Namespace string
3131
ModelSource string // "pvc", "hf", or "pvc-snapshot"
32+
MockImage string // if set, replace vLLM image with mock and remove GPU resources
33+
PullSecretName string // override pull secret name to copy (default: auto-detect from manifest)
3234
// LogFunc is called with progress messages. If nil, progress is silent.
3335
LogFunc func(format string, args ...interface{})
3436
}
@@ -76,6 +78,11 @@ func (d *Deployer) Deploy(ctx context.Context, tc *config.TestCase) *DeployResul
7678
}
7779
result.Logs = append(result.Logs, fmt.Sprintf("Namespace %s ready", ns))
7880

81+
// Copy image pull secrets referenced in the manifest from istio-system
82+
if err := d.ensurePullSecrets(ctx, tc.Deployment.ManifestPath, ns); err != nil {
83+
d.logProgress(" Warning: failed to copy pull secrets: %v", err)
84+
}
85+
7986
// Apply the manifest
8087
manifestPath := tc.Deployment.ManifestPath
8188
if manifestPath == "" {
@@ -436,6 +443,97 @@ func (d *Deployer) ensureNamespace(ctx context.Context, ns string) error {
436443
return err
437444
}
438445

446+
// ensurePullSecrets reads the manifest for imagePullSecrets references and copies
447+
// them from istio-system into the target namespace if they don't already exist.
448+
func (d *Deployer) ensurePullSecrets(ctx context.Context, manifestPath, ns string) error {
449+
// OCP clusters have pull secrets configured globally — no need to copy
450+
if d.Platform == PlatformOCP {
451+
return nil
452+
}
453+
454+
seen := map[string]bool{}
455+
456+
// If an explicit pull secret name is set, use that; otherwise auto-detect from manifest
457+
if d.PullSecretName != "" {
458+
seen[d.PullSecretName] = true
459+
} else {
460+
data, err := os.ReadFile(manifestPath)
461+
if err != nil {
462+
return fmt.Errorf("reading manifest: %w", err)
463+
}
464+
// Parse secret names from "- name: <secret>" lines under imagePullSecrets
465+
lines := strings.Split(string(data), "\n")
466+
inPullSecrets := false
467+
for _, line := range lines {
468+
trimmed := strings.TrimSpace(line)
469+
if trimmed == "imagePullSecrets:" {
470+
inPullSecrets = true
471+
continue
472+
}
473+
if inPullSecrets {
474+
if strings.HasPrefix(trimmed, "- name: ") {
475+
name := strings.TrimPrefix(trimmed, "- name: ")
476+
seen[name] = true
477+
continue
478+
}
479+
inPullSecrets = false
480+
}
481+
}
482+
}
483+
484+
sourceNamespaces := []string{"istio-system", "kserve", "opendatahub"}
485+
for secretName := range seen {
486+
// Skip if already exists in target namespace
487+
if _, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", ns); err == nil {
488+
continue
489+
}
490+
491+
// Try to copy from known source namespaces
492+
copied := false
493+
for _, srcNS := range sourceNamespaces {
494+
if _, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", srcNS); err != nil {
495+
continue
496+
}
497+
// Get the secret and re-apply in target namespace
498+
secretYAML, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", srcNS, "-o", "yaml")
499+
if err != nil {
500+
continue
501+
}
502+
// Replace namespace and strip cluster-specific metadata
503+
secretYAML = strings.ReplaceAll(secretYAML, "namespace: "+srcNS, "namespace: "+ns)
504+
// Remove resourceVersion, uid, creationTimestamp so it can be created fresh
505+
var cleanLines []string
506+
for _, l := range strings.Split(secretYAML, "\n") {
507+
t := strings.TrimSpace(l)
508+
if strings.HasPrefix(t, "resourceVersion:") ||
509+
strings.HasPrefix(t, "uid:") ||
510+
strings.HasPrefix(t, "creationTimestamp:") {
511+
continue
512+
}
513+
cleanLines = append(cleanLines, l)
514+
}
515+
tmpFile, err := os.CreateTemp("", "pull-secret-*.yaml")
516+
if err != nil {
517+
return fmt.Errorf("creating temp file: %w", err)
518+
}
519+
_, _ = tmpFile.WriteString(strings.Join(cleanLines, "\n"))
520+
_ = tmpFile.Close()
521+
_, err = d.Kubectl(ctx, "apply", "-n", ns, "-f", tmpFile.Name())
522+
_ = os.Remove(tmpFile.Name())
523+
if err != nil {
524+
continue
525+
}
526+
d.logProgress(" Copied pull secret %s from %s to %s", secretName, srcNS, ns)
527+
copied = true
528+
break
529+
}
530+
if !copied {
531+
return fmt.Errorf("pull secret %q not found in any of %v", secretName, sourceNamespaces)
532+
}
533+
}
534+
return nil
535+
}
536+
439537
// Kubectl runs a kubectl command with the deployer's kubeconfig and platform settings.
440538
func (d *Deployer) Kubectl(ctx context.Context, args ...string) (string, error) {
441539
cmdArgs := make([]string, 0, len(args)+2)
@@ -490,6 +588,72 @@ func (d *Deployer) patchManifest(manifestPath string, tc *config.TestCase) (stri
490588
}
491589
}
492590

591+
// Mock mode: replace main vLLM container with mock image (no GPU, no model download)
592+
// Only patches under spec.template.containers, NOT spec.router.scheduler.template.containers
593+
if d.MockImage != "" {
594+
var newLines []string
595+
skip := false
596+
containerIndent := 0
597+
inSchedulerTemplate := false
598+
for _, line := range lines {
599+
trimmed := strings.TrimSpace(line)
600+
lineIndent := len(line) - len(strings.TrimLeft(line, " "))
601+
602+
// Track if we're inside the scheduler template section
603+
if trimmed == "scheduler:" || strings.HasPrefix(trimmed, "scheduler: ") {
604+
inSchedulerTemplate = true
605+
}
606+
// spec.template / spec.prefill.template are at lower indent than scheduler.template
607+
if trimmed == "template:" && !inSchedulerTemplate {
608+
// Already outside scheduler — keep as false
609+
} else if trimmed == "template:" && lineIndent <= 4 {
610+
inSchedulerTemplate = false
611+
}
612+
613+
// Replace all "- name: main" under spec.template and spec.prefill.template, not scheduler.template
614+
if trimmed == "- name: main" && !inSchedulerTemplate {
615+
containerIndent = lineIndent
616+
skip = true
617+
indent := strings.Repeat(" ", containerIndent)
618+
newLines = append(newLines,
619+
indent+"- name: main",
620+
indent+" image: "+d.MockImage,
621+
indent+" imagePullPolicy: Always",
622+
indent+" command: [\"python3\"]",
623+
indent+" args: [\"/app/server.py\"]",
624+
indent+" resources:",
625+
indent+" limits:",
626+
indent+" cpu: \"500m\"",
627+
indent+" memory: 128Mi",
628+
indent+" requests:",
629+
indent+" cpu: \"100m\"",
630+
indent+" memory: 64Mi",
631+
)
632+
patched = true
633+
continue
634+
}
635+
636+
if skip {
637+
if trimmed != "" && lineIndent <= containerIndent {
638+
skip = false
639+
} else {
640+
continue
641+
}
642+
}
643+
644+
newLines = append(newLines, line)
645+
}
646+
// Filter empty lines left behind by container block removal
647+
var filteredLines []string
648+
for _, line := range newLines {
649+
if line != "" {
650+
filteredLines = append(filteredLines, line)
651+
}
652+
}
653+
lines = filteredLines
654+
d.logProgress(" Mock mode: using image %s (no GPU)", d.MockImage)
655+
}
656+
493657
if !patched {
494658
return manifestPath, nil
495659
}

0 commit comments

Comments
 (0)