Skip to content

Nightly - optimized baseline E2E (GKE) #5

Nightly - optimized baseline E2E (GKE)

Nightly - optimized baseline E2E (GKE) #5

name: Nightly - optimized baseline E2E (GKE)
# Nightly regression test for the optimized-baseline guide on GKE.
# Deploys the guide and validates with e2e-validate.sh.
on:
schedule:
- cron: '0 10 * * *' # 10:00 UTC daily (staggered from OCP nightlies)
workflow_dispatch:
inputs:
skip_cleanup:
description: 'Skip cleanup after tests (for debugging)'
required: false
default: 'false'
permissions:
contents: read
concurrency:
group: nightly-e2e-optimized-baseline-gke
cancel-in-progress: true
jobs:
nightly:
if: github.repository == 'llm-d/llm-d'
uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-gke.yaml@main
with:
guide_name: optimized-baseline
namespace: llm-d-nightly-inference-gke
gateway_host: 'optimized-baseline-epp'
custom_deploy_script: |
kubectl apply -k guides/optimized-baseline/modelserver/gpu/gke-patch/vllm -n ${NAMESPACE}
helm install optimized-baseline \
oci://registry.k8s.io/gateway-api-inference-extension/charts/standalone \
-f guides/recipes/scheduler/base.values.yaml \
-f guides/optimized-baseline/scheduler/optimized-baseline.values.yaml \
-n ${NAMESPACE} --version v1.4.0
gke_cluster_name: llm-d-e2e-us-east5
gke_cluster_zone: us-east5
required_gpus: 2
recommended_gpus: 4
accelerator_type: H100
pod_wait_timeout: '30m'
pod_readiness_delay: 180
image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest'
allow_gpu_preemption: true
llm_d_ref: ${{ github.ref }}
skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }}
secrets: inherit