Add mock mode, pull secret automation, and pod health improvements

aneeshkp · aneeshkp · commit 1b98069775db · 2026-04-06T21:52:44.000-04:00
Signed-off-by: Aneesh Puttur &lt;aneeshputtur@gmail.com&gt;
diff --git a/Makefile b/Makefile
@@ -20,6 +20,8 @@ MODEL_SOURCE ?= hf
 MODEL ?=
 NO_CLEANUP ?=
 DISCOVER ?=
+MOCK ?=
+PULL_SECRET ?=
 
 # Build flags
 GO_TEST_FLAGS ?= -v
@@ -57,6 +59,12 @@ endif
 ifdef DISCOVER
   TEST_FLAGS += -mode=discover
 endif
+ifdef MOCK
+  TEST_FLAGS += -mock=$(MOCK)
+endif
+ifdef PULL_SECRET
+  TEST_FLAGS += -pull-secret=$(PULL_SECRET)
+endif
 
 .PHONY: help
 help: ## Show this help
@@ -79,6 +87,10 @@ help: ## Show this help
 	@printf '  \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu NO_CLEANUP=1"
 	@printf '  \033[36m%-25s\033[0m %s\n' "DISCOVER" "Validate existing deployment (skip deploy/cleanup)"
 	@printf '  \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns"
+	@printf '  \033[36m%-25s\033[0m %s\n' "MOCK" "Mock vLLM image for testing without GPU"
+	@printf '  \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest"
+	@printf '  \033[36m%-25s\033[0m %s\n' "PULL_SECRET" "Pull secret name to copy into namespace (default: auto-detect)"
+	@printf '  \033[33m%-25s\033[0m %s\n' "" "make test TESTCASE=single-gpu PULL_SECRET=my-registry-secret"
 	@printf '  \033[36m%-25s\033[0m %s\n' "STORAGE_CLASS" "StorageClass for PVCs (default: cluster default)"
 	@printf '  \033[33m%-25s\033[0m %s\n' "" "make cache-model TESTCASE=single-gpu STORAGE_CLASS=azurefile-rwx"
 	@printf '  \033[36m%-25s\033[0m %s\n' "STORAGE_SIZE" "Override PVC storage size (default: from test case config)"
@@ -289,6 +301,9 @@ testcases: ## List available test cases grouped by category
 	@echo "Discover (validate existing deployment):"
 	@echo "  make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns"
 	@echo ""
+	@echo "Mock mode (no GPU required):"
+	@echo "  make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest"
+	@echo ""
 
 .PHONY: models
 models: ## List models and URIs for all test cases
diff --git a/README.md b/README.md
@@ -144,6 +144,32 @@ make test TESTCASE=single-gpu DISCOVER=true NAMESPACE=my-ns
 make test-profile-all DISCOVER=true NAMESPACE=my-ns
 ```
 
+## Mock Mode
+
+Test the full framework lifecycle **without GPUs or real models**. The deployer replaces the vLLM container in each manifest with a lightweight mock image that serves the OpenAI-compatible API.
+
+```bash
+# Run with mock image (no GPU required)
+make test TESTCASE=single-gpu MOCK=ghcr.io/aneeshkp/vllm-mock:latest
+
+# Works with any test case
+make test TESTCASE=pd MOCK=ghcr.io/aneeshkp/vllm-mock:latest
+
+# Combine with other flags
+make test-profile-all MOCK=ghcr.io/aneeshkp/vllm-mock:latest NO_CLEANUP=1
+```
+
+What mock mode does:
+- Replaces the `main` container in `spec.template` and `spec.prefill.template` with the mock image
+- Removes GPU resource requests/limits (uses minimal CPU/memory instead)
+- Skips the metrics scraping phase (mock doesn't expose real vLLM metrics)
+- Scheduler containers are **not** replaced — they run as normal
+
+This is useful for:
+- CI/CD pipelines on clusters without GPUs
+- Testing framework changes without waiting for model downloads
+- Validating manifest structure and KServe operator behavior
+
 ## Configuration
 
 ### Flags
@@ -154,6 +180,8 @@ make test-profile-all DISCOVER=true NAMESPACE=my-ns
 | `MODEL` | — | Override model (e.g., `Qwen/Qwen2.5-7B-Instruct`) |
 | `MODEL_SOURCE` | `hf` | `hf` (HuggingFace direct) or `pvc` (pre-cached) |
 | `MANIFEST_REF` | `main` | Manifest repo branch (e.g., `3.4-ea1`, `3.4-ea2`) |
+| `MOCK` | — | Mock vLLM image (e.g., `ghcr.io/aneeshkp/vllm-mock:latest`) |
+| `PULL_SECRET` | auto-detect | Pull secret name to copy into namespace (skipped on OCP) |
 | `NO_CLEANUP` | — | Set to `1` to keep resources after test |
 | `DISCOVER` | — | Set to `true` to validate existing deployment (skip deploy/cleanup) |
 | `STORAGE_CLASS` | cluster default | StorageClass for PVCs |
diff --git a/docs/adding-test-cases.md b/docs/adding-test-cases.md
@@ -34,6 +34,8 @@ spec:
     route: {}
     gateway: {}
   template:
+    imagePullSecrets:
+    - name: redhat-pull-secret
     containers:
       - name: main
         resources:
@@ -66,6 +68,7 @@ Customize based on what you're testing:
 | Data/Expert parallelism | Add `parallelism:` section |
 | RDMA/RoCE | Add network annotations and `rdma/roce_gdr` resources |
 | Multiple GPUs | Set `nvidia.com/gpu: "8"` in resources |
+| Pull secrets | Add `imagePullSecrets` under `template` (auto-copied from `istio-system` at deploy time) |
 
 Push to the manifest repo, then run `make setup` to pull the new manifest.
 
@@ -116,6 +119,7 @@ cleanup: true
 | `name` | Unique test case name (used with `TESTCASE=`) |
 | `model.name` | HuggingFace model ID — can be overridden with `MODEL=` |
 | `model.uri` | Base model URI (framework patches this based on `MODEL_SOURCE`) |
+| `model.category` | Groups test cases in `make testcases`: `single-node-gpu`, `cache-aware`, `multi-node-gpu`, `deepseek` |
 | `model.cache.storageSize` | PVC size — can be overridden with `STORAGE_SIZE=` |
 | `deployment.manifestPath` | Manifest filename (looked up in `deploy/manifests/`) |
 | `deployment.readyTimeout` | Max wait for `LLMInferenceService` to become READY |
@@ -154,7 +158,7 @@ validation:
 Add the test case name to a profile in `configs/profiles/`:
 
 ```yaml
-# configs/profiles/full.yaml
+# configs/profiles/all.yaml
 testCases:
   - single-gpu
   - my-new-test          # add here
@@ -192,6 +196,9 @@ make test TESTCASE=my-new-test MODEL=Qwen/Qwen2.5-7B-Instruct
 # Pre-cache model, then run with PVC
 make cache-model TESTCASE=my-new-test
 make test TESTCASE=my-new-test MODEL_SOURCE=pvc
+
+# Run with mock image (no GPU required)
+make test TESTCASE=my-new-test MOCK=ghcr.io/aneeshkp/vllm-mock:latest
 ```
 
 ## File checklist
diff --git a/framework/deployer/deployer.go b/framework/deployer/deployer.go
@@ -29,6 +29,8 @@ type Deployer struct {
 	Platform    Platform
 	Namespace   string
 	ModelSource string // "pvc", "hf", or "pvc-snapshot"
+	MockImage      string // if set, replace vLLM image with mock and remove GPU resources
+	PullSecretName string // override pull secret name to copy (default: auto-detect from manifest)
 	// LogFunc is called with progress messages. If nil, progress is silent.
 	LogFunc func(format string, args ...interface{})
 }
@@ -76,6 +78,11 @@ func (d *Deployer) Deploy(ctx context.Context, tc *config.TestCase) *DeployResul
 	}
 	result.Logs = append(result.Logs, fmt.Sprintf("Namespace %s ready", ns))
 
+	// Copy image pull secrets referenced in the manifest from istio-system
+	if err := d.ensurePullSecrets(ctx, tc.Deployment.ManifestPath, ns); err != nil {
+		d.logProgress("  Warning: failed to copy pull secrets: %v", err)
+	}
+
 	// Apply the manifest
 	manifestPath := tc.Deployment.ManifestPath
 	if manifestPath == "" {
@@ -436,6 +443,97 @@ func (d *Deployer) ensureNamespace(ctx context.Context, ns string) error {
 	return err
 }
 
+// ensurePullSecrets reads the manifest for imagePullSecrets references and copies
+// them from istio-system into the target namespace if they don't already exist.
+func (d *Deployer) ensurePullSecrets(ctx context.Context, manifestPath, ns string) error {
+	// OCP clusters have pull secrets configured globally — no need to copy
+	if d.Platform == PlatformOCP {
+		return nil
+	}
+
+	seen := map[string]bool{}
+
+	// If an explicit pull secret name is set, use that; otherwise auto-detect from manifest
+	if d.PullSecretName != "" {
+		seen[d.PullSecretName] = true
+	} else {
+		data, err := os.ReadFile(manifestPath)
+		if err != nil {
+			return fmt.Errorf("reading manifest: %w", err)
+		}
+		// Parse secret names from "- name: <secret>" lines under imagePullSecrets
+		lines := strings.Split(string(data), "\n")
+		inPullSecrets := false
+		for _, line := range lines {
+			trimmed := strings.TrimSpace(line)
+			if trimmed == "imagePullSecrets:" {
+				inPullSecrets = true
+				continue
+			}
+			if inPullSecrets {
+				if strings.HasPrefix(trimmed, "- name: ") {
+					name := strings.TrimPrefix(trimmed, "- name: ")
+					seen[name] = true
+					continue
+				}
+				inPullSecrets = false
+			}
+		}
+	}
+
+	sourceNamespaces := []string{"istio-system", "kserve", "opendatahub"}
+	for secretName := range seen {
+		// Skip if already exists in target namespace
+		if _, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", ns); err == nil {
+			continue
+		}
+
+		// Try to copy from known source namespaces
+		copied := false
+		for _, srcNS := range sourceNamespaces {
+			if _, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", srcNS); err != nil {
+				continue
+			}
+			// Get the secret and re-apply in target namespace
+			secretYAML, err := d.Kubectl(ctx, "get", "secret", secretName, "-n", srcNS, "-o", "yaml")
+			if err != nil {
+				continue
+			}
+			// Replace namespace and strip cluster-specific metadata
+			secretYAML = strings.ReplaceAll(secretYAML, "namespace: "+srcNS, "namespace: "+ns)
+			// Remove resourceVersion, uid, creationTimestamp so it can be created fresh
+			var cleanLines []string
+			for _, l := range strings.Split(secretYAML, "\n") {
+				t := strings.TrimSpace(l)
+				if strings.HasPrefix(t, "resourceVersion:") ||
+					strings.HasPrefix(t, "uid:") ||
+					strings.HasPrefix(t, "creationTimestamp:") {
+					continue
+				}
+				cleanLines = append(cleanLines, l)
+			}
+			tmpFile, err := os.CreateTemp("", "pull-secret-*.yaml")
+			if err != nil {
+				return fmt.Errorf("creating temp file: %w", err)
+			}
+			_, _ = tmpFile.WriteString(strings.Join(cleanLines, "\n"))
+			_ = tmpFile.Close()
+			_, err = d.Kubectl(ctx, "apply", "-n", ns, "-f", tmpFile.Name())
+			_ = os.Remove(tmpFile.Name())
+			if err != nil {
+				continue
+			}
+			d.logProgress("  Copied pull secret %s from %s to %s", secretName, srcNS, ns)
+			copied = true
+			break
+		}
+		if !copied {
+			return fmt.Errorf("pull secret %q not found in any of %v", secretName, sourceNamespaces)
+		}
+	}
+	return nil
+}
+
 // Kubectl runs a kubectl command with the deployer's kubeconfig and platform settings.
 func (d *Deployer) Kubectl(ctx context.Context, args ...string) (string, error) {
 	cmdArgs := make([]string, 0, len(args)+2)
@@ -490,6 +588,72 @@ func (d *Deployer) patchManifest(manifestPath string, tc *config.TestCase) (stri
 		}
 	}
 
+	// Mock mode: replace main vLLM container with mock image (no GPU, no model download)
+	// Only patches under spec.template.containers, NOT spec.router.scheduler.template.containers
+	if d.MockImage != "" {
+		var newLines []string
+		skip := false
+		containerIndent := 0
+		inSchedulerTemplate := false
+		for _, line := range lines {
+			trimmed := strings.TrimSpace(line)
+			lineIndent := len(line) - len(strings.TrimLeft(line, " "))
+
+			// Track if we're inside the scheduler template section
+			if trimmed == "scheduler:" || strings.HasPrefix(trimmed, "scheduler: ") {
+				inSchedulerTemplate = true
+			}
+			// spec.template / spec.prefill.template are at lower indent than scheduler.template
+			if trimmed == "template:" && !inSchedulerTemplate {
+				// Already outside scheduler — keep as false
+			} else if trimmed == "template:" && lineIndent <= 4 {
+				inSchedulerTemplate = false
+			}
+
+			// Replace all "- name: main" under spec.template and spec.prefill.template, not scheduler.template
+			if trimmed == "- name: main" && !inSchedulerTemplate {
+				containerIndent = lineIndent
+				skip = true
+				indent := strings.Repeat(" ", containerIndent)
+				newLines = append(newLines,
+					indent+"- name: main",
+					indent+"  image: "+d.MockImage,
+					indent+"  imagePullPolicy: Always",
+					indent+"  command: [\"python3\"]",
+					indent+"  args: [\"/app/server.py\"]",
+					indent+"  resources:",
+					indent+"    limits:",
+					indent+"      cpu: \"500m\"",
+					indent+"      memory: 128Mi",
+					indent+"    requests:",
+					indent+"      cpu: \"100m\"",
+					indent+"      memory: 64Mi",
+				)
+				patched = true
+				continue
+			}
+
+			if skip {
+				if trimmed != "" && lineIndent <= containerIndent {
+					skip = false
+				} else {
+					continue
+				}
+			}
+
+			newLines = append(newLines, line)
+		}
+		// Filter empty lines left behind by container block removal
+		var filteredLines []string
+		for _, line := range newLines {
+			if line != "" {
+				filteredLines = append(filteredLines, line)
+			}
+		}
+		lines = filteredLines
+		d.logProgress("  Mock mode: using image %s (no GPU)", d.MockImage)
+	}
+
 	if !patched {
 		return manifestPath, nil
 	}
diff --git a/tests/conformance_test.go b/tests/conformance_test.go
diff --git a/tests/suite_test.go b/tests/suite_test.go