@@ -234,13 +234,17 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
234234 GinkgoWriter .Printf ("Running spike phase for %v\n " , spikeDuration )
235235
236236 spikeStart := time .Now ()
237- targetURL := fmt .Sprintf ("http://%s.%s.svc.cluster.local:8000/v1/completions" , serviceName , benchCfg .LLMDNamespace )
237+
238+ // Route load through the Gateway → EPP → model server (full llm-d stack)
239+ gwHost := fmt .Sprintf ("%s.%s.svc.cluster.local" , benchCfg .GatewayServiceName , benchCfg .LLMDNamespace )
240+ targetURL := fmt .Sprintf ("http://%s:%d/v1/completions" , gwHost , benchCfg .GatewayServicePort )
241+ GinkgoWriter .Printf ("Load target URL (via Gateway): %s\n " , targetURL )
238242
239243 By ("Cleaning up any existing load jobs" )
240244 fixtures .DeleteParallelLoadJobs (ctx , k8sClient , jobBaseName , benchCfg .LLMDNamespace , benchLoadWorkers )
241245 time .Sleep (2 * time .Second )
242246
243- By ("Waiting for service endpoints to exist" )
247+ By ("Waiting for model service endpoints to exist" )
244248 Eventually (func (g Gomega ) {
245249 endpoints , err := k8sClient .CoreV1 ().Endpoints (benchCfg .LLMDNamespace ).Get (ctx , serviceName , metav1.GetOptions {})
246250 g .Expect (err ).NotTo (HaveOccurred ())
@@ -253,26 +257,27 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
253257 g .Expect (readyCount ).To (BeNumerically (">" , 0 ))
254258 }, 5 * time .Minute , 10 * time .Second ).Should (Succeed ())
255259
256- By ("Running in-cluster connectivity probe to diagnose load path " )
260+ By ("Running in-cluster connectivity probe via Gateway " )
257261 probePodName := "bench-connectivity-probe"
258262 probeScript := fmt .Sprintf (`#!/bin/sh
259- echo "=== DNS Resolution ==="
260- nslookup %s.%s.svc.cluster.local 2>&1 || echo "nslookup failed (tool may not exist)"
263+ echo "=== Gateway DNS Resolution ==="
264+ nslookup %s 2>&1 || echo "nslookup failed (tool may not exist)"
261265echo ""
262- echo "=== Service ClusterIP ==="
263- getent hosts %s.%s.svc.cluster.local 2>&1 || echo "getent failed"
266+ echo "=== Gateway Service ClusterIP ==="
267+ getent hosts %s 2>&1 || echo "getent failed"
264268echo ""
265- echo "=== Curl verbose GET ==="
266- curl -v --max-time 10 "%s" 2>&1
269+ echo "=== Direct model service check ==="
270+ HTTP_CODE=$(curl -s -o /dev/null -w "%%{http_code}" --max-time 10 "http://%s.%s.svc.cluster.local:8000/v1/completions" 2>/dev/null)
271+ echo "Direct model service HTTP status: $HTTP_CODE"
267272echo ""
268- echo "=== Curl verbose POST ==="
269- curl -v --max-time 10 -X POST "%s" -H "Content-Type: application/json" -d '{"model":"test ","prompt":"hello","max_tokens":1}' 2>&1
273+ echo "=== Gateway POST (full stack path) ==="
274+ curl -v --max-time 15 -X POST "%s" -H "Content-Type: application/json" -d '{"model":"%s ","prompt":"hello","max_tokens":1}' 2>&1
270275echo ""
271- echo "=== HTTP status code only ==="
272- HTTP_CODE=$(curl -s -o /dev/null -w "%%{http_code}" "%s" 2>/dev/null)
273- echo "HTTP status code: $HTTP_CODE"
274- echo "Grep test: $(echo $HTTP_CODE | grep -E '^(200|404)' && echo PASS || echo FAIL)"
275- ` , serviceName , benchCfg . LLMDNamespace , serviceName , benchCfg .LLMDNamespace , targetURL , targetURL , targetURL )
276+ echo "=== Gateway HTTP status code ==="
277+ HTTP_CODE=$(curl -s -o /dev/null -w "%%{http_code}" --max-time 15 -X POST "%s" -H "Content-Type: application/json" -d '{"model":"%s","prompt":"test","max_tokens":1}' 2>/dev/null)
278+ echo "Gateway HTTP status code: $HTTP_CODE"
279+ echo "Grep test: $(echo $HTTP_CODE | grep -E '^(200|404|405 )' && echo PASS || echo FAIL)"
280+ ` , gwHost , gwHost , serviceName , benchCfg .LLMDNamespace , targetURL , benchCfg . ModelID , targetURL , benchCfg . ModelID )
276281
277282 probePod := & corev1.Pod {
278283 ObjectMeta : metav1.ObjectMeta {
0 commit comments