|
1 | | -name: Nightly - OpenShift E2E Tests |
| 1 | +name: Nightly - CKS E2E Tests |
2 | 2 |
|
3 | | -# Nightly regression test for WVA on OpenShift. |
4 | | -# Calls the reusable workflow from llm-d/llm-d-infra to deploy the |
5 | | -# workload-autoscaling guide stack and run the e2e test suite. |
| 3 | +# Nightly regression test for WVA on CoreWeave Kubernetes (CKS). |
| 4 | +# Calls the reusable CKS helmfile workflow from llm-d/llm-d-infra to deploy |
| 5 | +# the workload-autoscaling guide stack and run the e2e test suite on waldorf. |
6 | 6 |
|
7 | 7 | on: |
8 | 8 | schedule: |
9 | | - - cron: '0 0 * * *' # Midnight UTC daily |
| 9 | + - cron: '30 6 * * *' # 06:30 UTC daily (staggered from IS CKS at 06:00) |
10 | 10 | workflow_dispatch: |
11 | 11 | inputs: |
12 | 12 | model_id: |
13 | 13 | description: 'Model ID' |
14 | 14 | required: false |
15 | 15 | default: 'unsloth/Meta-Llama-3.1-8B' |
16 | 16 | accelerator_type: |
17 | | - description: 'Accelerator type (H100, A100, L40S)' |
| 17 | + description: 'Accelerator type (H100, H200, A100)' |
18 | 18 | required: false |
19 | | - default: 'A100' |
| 19 | + default: 'H100' |
20 | 20 | image_tag: |
21 | 21 | description: 'WVA image tag — "latest" auto-resolves to newest release' |
22 | 22 | required: false |
@@ -46,20 +46,22 @@ permissions: |
46 | 46 | contents: read |
47 | 47 |
|
48 | 48 | concurrency: |
49 | | - group: nightly-e2e-openshift |
| 49 | + group: nightly-e2e-cks-wva |
50 | 50 | cancel-in-progress: true |
51 | 51 |
|
52 | 52 | jobs: |
53 | 53 | nightly: |
54 | | - uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-openshift.yaml@main |
| 54 | + uses: llm-d/llm-d-infra/.github/workflows/reusable-nightly-e2e-cks-helmfile.yaml@main |
55 | 55 | with: |
56 | 56 | guide_name: workload-autoscaling |
57 | | - namespace_suffix: nightly-wva |
| 57 | + namespace: llm-d-nightly-wva-cks |
| 58 | + helmfile_env: istio |
| 59 | + gateway_type: istio |
58 | 60 | caller_repo: ${{ github.repository }} |
59 | 61 | caller_ref: ${{ github.ref_name }} |
60 | 62 | deploy_wva: true |
61 | 63 | model_id: ${{ github.event.inputs.model_id || 'unsloth/Meta-Llama-3.1-8B' }} |
62 | | - accelerator_type: ${{ github.event.inputs.accelerator_type || 'A100' }} |
| 64 | + accelerator_type: ${{ github.event.inputs.accelerator_type || 'H100' }} |
63 | 65 | wva_image_tag: ${{ github.event.inputs.image_tag || 'latest' }} |
64 | 66 | request_rate: ${{ github.event.inputs.request_rate || '20' }} |
65 | 67 | num_prompts: ${{ github.event.inputs.num_prompts || '3000' }} |
|
68 | 70 | skip_cleanup: ${{ github.event.inputs.skip_cleanup == 'true' }} |
69 | 71 | required_gpus: 2 |
70 | 72 | recommended_gpus: 4 |
| 73 | + allow_gpu_preemption: true |
| 74 | + pod_wait_timeout: '30m' |
| 75 | + pod_readiness_delay: 180 |
| 76 | + image_override: 'ghcr.io/llm-d/llm-d-cuda-dev:latest' |
71 | 77 | test_target: test-e2e-openshift |
72 | 78 | secrets: inherit |
0 commit comments