Nightly - optimized baseline E2E (GKE) #5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly - optimized baseline E2E (GKE) | |
| # Nightly regression test for the optimized-baseline guide on GKE. | |
| # Deploys the guide and validates with e2e-validate.sh. | |
| on: | |
| schedule: | |
| - cron: '0 10 * * *' # 10:00 UTC daily (staggered from OCP nightlies) | |
| workflow_dispatch: | |
| inputs: | |
| skip_cleanup: | |
| description: 'Skip cleanup after tests (for debugging)' | |
| required: false | |
| default: 'false' | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: nightly-e2e-optimized-baseline-gke | |
| cancel-in-progress: true | |
| jobs: | |
| nightly: | |
| if: github.repository == 'llm-d/llm-d' | |
| uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-gke.yaml@main | |
| with: | |
| guide_name: optimized-baseline | |
| namespace: llm-d-nightly-inference-gke | |
| gateway_host: 'optimized-baseline-epp' | |
| custom_deploy_script: | | |
| kubectl apply -k guides/optimized-baseline/modelserver/gpu/gke-patch/vllm -n ${NAMESPACE} | |
| helm install optimized-baseline \ | |
| oci://registry.k8s.io/gateway-api-inference-extension/charts/standalone \ | |
| -f guides/recipes/scheduler/base.values.yaml \ | |
| -f guides/optimized-baseline/scheduler/optimized-baseline.values.yaml \ | |
| -n ${NAMESPACE} --version v1.4.0 | |
| gke_cluster_name: llm-d-e2e-us-east5 | |
| gke_cluster_zone: us-east5 | |
| required_gpus: 2 | |
| recommended_gpus: 4 | |
| accelerator_type: H100 | |
| pod_wait_timeout: '30m' | |
| pod_readiness_delay: 180 | |
| image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest' | |
| allow_gpu_preemption: true | |
| llm_d_ref: ${{ github.ref }} | |
| skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }} | |
| secrets: inherit |