Skip to content

Commit 65299a3

Browse files
committed
Conformance: Adds Data Parallelism Test
Signed-off-by: Daneyon Hansen <[email protected]>
1 parent 4e01160 commit 65299a3

File tree

6 files changed

+697
-77
lines changed

6 files changed

+697
-77
lines changed

conformance/resources/base.yaml

Lines changed: 211 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ spec:
200200
terminationGracePeriodSeconds: 130
201201
containers:
202202
- name: epp
203-
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
203+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928
204204
imagePullPolicy: Always
205205
args:
206206
- --pool-name
@@ -298,7 +298,7 @@ spec:
298298
terminationGracePeriodSeconds: 130
299299
containers:
300300
- name: epp
301-
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v1.0.0
301+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928
302302
imagePullPolicy: Always
303303
args:
304304
- --pool-name
@@ -340,6 +340,215 @@ spec:
340340
configMap:
341341
name: plugins-config
342342
---
343+
# -- Data Parallelism (DP) backend deployment: 3 pods, each listening on three ports to simulate ranks ---
344+
apiVersion: apps/v1
345+
kind: Deployment
346+
metadata:
347+
name: dp-inference-model-server-deployment
348+
namespace: inference-conformance-app-backend
349+
labels:
350+
app: dp-inference-model-server
351+
spec:
352+
replicas: 3
353+
selector:
354+
matchLabels:
355+
app: dp-inference-model-server
356+
template:
357+
metadata:
358+
labels:
359+
app: dp-inference-model-server
360+
spec:
361+
containers:
362+
- name: echoserver-3000
363+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
364+
ports:
365+
- containerPort: 3000
366+
readinessProbe:
367+
httpGet:
368+
path: /
369+
port: 3000
370+
initialDelaySeconds: 3
371+
periodSeconds: 5
372+
failureThreshold: 2
373+
env:
374+
- name: HTTP_PORT # Default port for HTTP echo server
375+
value: "3000"
376+
- name: H2C_PORT # Default port for HTC echo server
377+
value: "3001"
378+
- name: INCLUDE_HTTP_PORT_HEADER
379+
value: "true"
380+
- name: POD_NAME
381+
valueFrom:
382+
fieldRef:
383+
fieldPath: metadata.name
384+
- name: NAMESPACE
385+
valueFrom:
386+
fieldRef:
387+
fieldPath: metadata.namespace
388+
- name: POD_IP
389+
valueFrom:
390+
fieldRef:
391+
fieldPath: status.podIP
392+
- name: echoserver-3002
393+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
394+
ports:
395+
- containerPort: 3002
396+
readinessProbe:
397+
httpGet:
398+
path: /
399+
port: 3002
400+
initialDelaySeconds: 3
401+
periodSeconds: 5
402+
failureThreshold: 2
403+
env:
404+
- name: HTTP_PORT
405+
value: "3002"
406+
- name: H2C_PORT
407+
value: "3003"
408+
- name: INCLUDE_HTTP_PORT_HEADER
409+
value: "true"
410+
- name: POD_NAME
411+
valueFrom:
412+
fieldRef:
413+
fieldPath: metadata.name
414+
- name: NAMESPACE
415+
valueFrom:
416+
fieldRef:
417+
fieldPath: metadata.namespace
418+
- name: POD_IP
419+
valueFrom:
420+
fieldRef:
421+
fieldPath: status.podIP
422+
- name: echoserver-3004
423+
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
424+
ports:
425+
- containerPort: 3004
426+
readinessProbe:
427+
httpGet:
428+
path: /
429+
port: 3004
430+
initialDelaySeconds: 3
431+
periodSeconds: 5
432+
failureThreshold: 2
433+
env:
434+
- name: HTTP_PORT
435+
value: "3004"
436+
- name: H2C_PORT
437+
value: "3005"
438+
- name: INCLUDE_HTTP_PORT_HEADER
439+
value: "true"
440+
- name: POD_NAME
441+
valueFrom:
442+
fieldRef:
443+
fieldPath: metadata.name
444+
- name: NAMESPACE
445+
valueFrom:
446+
fieldRef:
447+
fieldPath: metadata.namespace
448+
- name: POD_IP
449+
valueFrom:
450+
fieldRef:
451+
fieldPath: status.podIP
452+
---
453+
# --- Data Parallelism (DP) InferencePool Definition ---
454+
apiVersion: inference.networking.k8s.io/v1
455+
kind: InferencePool
456+
metadata:
457+
name: dp-inference-pool
458+
namespace: inference-conformance-app-backend
459+
spec:
460+
selector:
461+
matchLabels:
462+
app: dp-inference-model-server
463+
targetPorts:
464+
- number: 3000
465+
- number: 3002
466+
- number: 3004
467+
endpointPickerRef:
468+
name: dp-endpoint-picker-svc
469+
port:
470+
number: 9002
471+
---
472+
# --- Data Parallelism (DP) Conformance EPP service Definition ---
473+
apiVersion: v1
474+
kind: Service
475+
metadata:
476+
name: dp-endpoint-picker-svc
477+
namespace: inference-conformance-app-backend
478+
spec:
479+
selector:
480+
app: dp-app-backend-epp
481+
ports:
482+
- protocol: TCP
483+
port: 9002
484+
targetPort: 9002
485+
appProtocol: http2
486+
type: ClusterIP
487+
---
488+
# --- Data Parallelism (DP) Conformance EPP Deployment ---
489+
apiVersion: apps/v1
490+
kind: Deployment
491+
metadata:
492+
name: dp-app-endpoint-picker
493+
namespace: inference-conformance-app-backend
494+
labels:
495+
app: dp-app-backend-epp
496+
spec:
497+
replicas: 1
498+
selector:
499+
matchLabels:
500+
app: dp-app-backend-epp
501+
template:
502+
metadata:
503+
labels:
504+
app: dp-app-backend-epp
505+
spec:
506+
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
507+
terminationGracePeriodSeconds: 130
508+
containers:
509+
- name: epp
510+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v20251105-cbb8928
511+
imagePullPolicy: Always
512+
args:
513+
- --pool-name
514+
- "dp-inference-pool"
515+
- --pool-namespace
516+
- "inference-conformance-app-backend"
517+
- --v
518+
- "4"
519+
- --zap-encoder
520+
- "json"
521+
- --grpc-port
522+
- "9002"
523+
- --grpc-health-port
524+
- "9003"
525+
- "--config-file"
526+
- "/config/conformance-plugins.yaml"
527+
ports:
528+
- containerPort: 9002
529+
- containerPort: 9003
530+
- name: metrics
531+
containerPort: 9090
532+
livenessProbe:
533+
grpc:
534+
port: 9003
535+
service: inference-extension
536+
initialDelaySeconds: 5
537+
periodSeconds: 10
538+
readinessProbe:
539+
grpc:
540+
port: 9003
541+
service: inference-extension
542+
initialDelaySeconds: 5
543+
periodSeconds: 10
544+
volumeMounts:
545+
- name: plugins-config-volume
546+
mountPath: "/config"
547+
volumes:
548+
- name: plugins-config-volume
549+
configMap:
550+
name: plugins-config
551+
---
343552
apiVersion: v1
344553
kind: ConfigMap
345554
metadata:

0 commit comments

Comments
 (0)