Skip to content

chore(deps): bump vllm 0.18→0.20 + torch 2.10→2.11 stack #206

chore(deps): bump vllm 0.18→0.20 + torch 2.10→2.11 stack

chore(deps): bump vllm 0.18→0.20 + torch 2.10→2.11 stack #206

Workflow file for this run

# Copyright (c) 2024-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ---------------------------------------------------------------------------
# GPU tests run on NVIDIA on-prem self-hosted runners and use the copy-pr-bot
# pattern: PRs are tested via push events to pull-request/* branches rather
# than pull_request events.
# See: https://docs.gha-runners.nvidia.com/platform/apps/copy-pr-bot/
# ---------------------------------------------------------------------------
name: GPU Tests
on:
schedule:
# Nightly at 02:00 UTC.
- cron: '0 2 * * *'
# disabled for now to avoid running on PRs
# push:
# branches:
# - "pull-request/[0-9]+"
workflow_dispatch:
inputs:
suite:
description: "GPU test suite to run"
required: true
default: all
type: choice
options:
- all
- smoke
- e2e
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
changes:
name: Detect changes
if: github.event_name != 'workflow_dispatch'
runs-on: linux-amd64-cpu4
permissions:
contents: read
outputs:
src: ${{ steps.changes.outputs.src }}
test: ${{ steps.changes.outputs.test }}
steps:
- uses: actions/checkout@v6
- name: Detect changes
id: changes
uses: ./.github/actions/detect-changes
gpu-smoke-test:
name: GPU Smoke Tests
needs: changes
# `changes` is intentionally skipped on workflow_dispatch. `always()` lets
# manual runs bypass that skipped dependency and run the selected GPU suite.
if: >-
${{
always() &&
(
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.src == 'true' ||
needs.changes.outputs.test == 'true'
) &&
(
github.event_name != 'workflow_dispatch' ||
inputs.suite == 'all' ||
inputs.suite == 'smoke'
)
}}
timeout-minutes: 30
runs-on: linux-amd64-gpu-a100-latest-1
steps:
- name: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Setup GPU test environment
uses: ./.github/actions/setup-gpu-test-env
- name: Run GPU smoke tests
timeout-minutes: 20
run: make test-smoke-gpu
gpu-e2e-test:
name: GPU E2E Tests
needs: changes
# `changes` is intentionally skipped on workflow_dispatch. `always()` lets
# manual runs bypass that skipped dependency and run the selected GPU suite.
if: >-
${{
always() &&
(
github.event_name == 'workflow_dispatch' ||
needs.changes.outputs.src == 'true' ||
needs.changes.outputs.test == 'true'
) &&
(
github.event_name != 'workflow_dispatch' ||
inputs.suite == 'all' ||
inputs.suite == 'e2e'
)
}}
timeout-minutes: 60
runs-on: linux-amd64-gpu-a100-latest-1
steps:
- name: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
<<<<<<< HEAD

Check failure on line 129 in .github/workflows/gpu-tests.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/gpu-tests.yml

Invalid workflow file

You have an error in your yaml syntax on line 129
- name: Setup GPU test environment
uses: ./.github/actions/setup-gpu-test-env
=======
- name: Install make
run: apt-get update && apt-get install -y --no-install-recommends make
- name: Setup Python environment
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
bootstrap-tools: "true"
- name: Bootstrap CUDA environment
run: make bootstrap-nss cu129
- name: Check GPU availability
run: |
uv run python -c "import torch; print('cuda available:', torch.cuda.is_available()); print('device count:', torch.cuda.device_count())"
>>>>>>> 4a11f2bd (chore(deps): bump vllm 0.18→0.20 + torch 2.10→2.11 stack)
- name: Run GPU E2E tests
timeout-minutes: 45
run: make test-e2e
# ---------------------------------------------------------------------------
# Single required status check for branch protection.
# Smoke tests are required; E2E failures produce a warning but don't block.
# ---------------------------------------------------------------------------
gpu-ci-status:
name: GPU CI Status
if: always() && !cancelled()
needs: [changes, gpu-smoke-test, gpu-e2e-test]
runs-on: linux-amd64-cpu4
steps:
- name: Check job results
run: |
echo "changes: ${{ needs.changes.result }}"
echo "gpu-smoke-test: ${{ needs.gpu-smoke-test.result }}"
echo "gpu-e2e-test: ${{ needs.gpu-e2e-test.result }}"
if [[ "${{ needs.changes.result }}" == "failure" ]]; then
echo "::error::Change detection failed"
exit 1
fi
if [[ "${{ needs.gpu-smoke-test.result }}" == "failure" ]]; then
echo "::error::GPU smoke tests failed (required)"
exit 1
fi
if [[ "${{ needs.gpu-e2e-test.result }}" == "failure" ]]; then
echo "::warning::GPU E2E tests failed (informational, does not block merge)"
fi
echo "All required GPU jobs passed (or were skipped)."