cuda.core.system: affinity, clock, fan, temperature and thermals (#1492) #1991
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # Note: This name is referred to in the test job, so make sure any changes are sync'd up! | |
| # Further this is referencing a run in the backport branch to fetch old bindings. | |
| name: "CI" | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} | |
| cancel-in-progress: true | |
| on: | |
| push: | |
| branches: | |
| - "pull-request/[0-9]+" | |
| - "main" | |
| jobs: | |
| ci-vars: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| CUDA_BUILD_VER: ${{ steps.get-vars.outputs.cuda_build_ver }} | |
| CUDA_PREV_BUILD_VER: ${{ steps.get-vars.outputs.cuda_prev_build_ver }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| with: | |
| fetch-depth: 0 | |
| - name: Get CUDA build versions | |
| id: get-vars | |
| run: | | |
| cuda_build_ver=$(yq '.cuda.build.version' ci/versions.yml) | |
| echo "cuda_build_ver=$cuda_build_ver" >> $GITHUB_OUTPUT | |
| cuda_prev_build_ver=$(yq '.cuda.prev_build.version' ci/versions.yml) | |
| echo "cuda_prev_build_ver=$cuda_prev_build_ver" >> $GITHUB_OUTPUT | |
| should-skip: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| skip: ${{ steps.get-should-skip.outputs.skip }} | |
| doc-only: ${{ steps.get-should-skip.outputs.doc_only }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: Compute whether to skip builds and tests | |
| id: get-should-skip | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| set -euxo pipefail | |
| if ${{ startsWith(github.ref_name, 'pull-request/') }}; then | |
| pr_number="$(grep -Po '(\d+)$' <<< '${{ github.ref_name }}')" | |
| pr_title="$(gh pr view "${pr_number}" --json title --jq '.title')" | |
| skip="$(echo "${pr_title}" | grep -q '\[no-ci\]' && echo true || echo false)" | |
| doc_only="$(echo "${pr_title}" | grep -q '\[doc-only\]' && echo true || echo false)" | |
| else | |
| skip=false | |
| doc_only=false | |
| fi | |
| echo "skip=${skip}" >> "$GITHUB_OUTPUT" | |
| echo "doc_only=${doc_only}" >> "$GITHUB_OUTPUT" | |
| # NOTE: Build jobs are intentionally split by platform rather than using a single | |
| # matrix. This allows each test job to depend only on its corresponding build, | |
| # so faster platforms can proceed through build & test without waiting for slower | |
| # ones. Keep these job definitions textually identical except for: | |
| # - host-platform value | |
| # - if: condition (build-linux-64 omits doc-only check since it's needed for docs) | |
| build-linux-64: | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - linux-64 | |
| name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }} | |
| secrets: inherit | |
| uses: ./.github/workflows/build-wheel.yml | |
| with: | |
| host-platform: ${{ matrix.host-platform }} | |
| cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} | |
| # See build-linux-64 for why build jobs are split by platform. | |
| build-linux-aarch64: | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - linux-aarch64 | |
| name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) && !fromJSON(needs.should-skip.outputs.doc-only) }} | |
| secrets: inherit | |
| uses: ./.github/workflows/build-wheel.yml | |
| with: | |
| host-platform: ${{ matrix.host-platform }} | |
| cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} | |
| # See build-linux-64 for why build jobs are split by platform. | |
| build-windows: | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - win-64 | |
| name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) && !fromJSON(needs.should-skip.outputs.doc-only) }} | |
| secrets: inherit | |
| uses: ./.github/workflows/build-wheel.yml | |
| with: | |
| host-platform: ${{ matrix.host-platform }} | |
| cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} | |
| # NOTE: Test jobs are split by platform for the same reason as build jobs (see | |
| # build-linux-64). Keep these job definitions textually identical except for: | |
| # - host-platform value | |
| # - build job under needs: | |
| # - uses: (test-wheel-linux.yml vs test-wheel-windows.yml) | |
| test-linux-64: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - linux-64 | |
| name: Test ${{ matrix.host-platform }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.doc-only) }} | |
| permissions: | |
| contents: read # This is required for actions/checkout | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| - build-linux-64 | |
| secrets: inherit | |
| uses: ./.github/workflows/test-wheel-linux.yml | |
| with: | |
| build-type: pull-request | |
| host-platform: ${{ matrix.host-platform }} | |
| build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| # See test-linux-64 for why test jobs are split by platform. | |
| test-linux-aarch64: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - linux-aarch64 | |
| name: Test ${{ matrix.host-platform }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.doc-only) }} | |
| permissions: | |
| contents: read # This is required for actions/checkout | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| - build-linux-aarch64 | |
| secrets: inherit | |
| uses: ./.github/workflows/test-wheel-linux.yml | |
| with: | |
| build-type: pull-request | |
| host-platform: ${{ matrix.host-platform }} | |
| build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| # See test-linux-64 for why test jobs are split by platform. | |
| test-windows: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| host-platform: | |
| - win-64 | |
| name: Test ${{ matrix.host-platform }} | |
| if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.doc-only) }} | |
| permissions: | |
| contents: read # This is required for actions/checkout | |
| needs: | |
| - ci-vars | |
| - should-skip | |
| - build-windows | |
| secrets: inherit | |
| uses: ./.github/workflows/test-wheel-windows.yml | |
| with: | |
| build-type: pull-request | |
| host-platform: ${{ matrix.host-platform }} | |
| build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| doc: | |
| name: Docs | |
| if: ${{ github.repository_owner == 'nvidia' }} | |
| # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages | |
| permissions: | |
| id-token: write | |
| contents: write | |
| pull-requests: write | |
| needs: | |
| - ci-vars | |
| - build-linux-64 | |
| secrets: inherit | |
| uses: ./.github/workflows/build-docs.yml | |
| with: | |
| build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} | |
| checks: | |
| name: Check job status | |
| if: always() | |
| runs-on: ubuntu-latest | |
| needs: | |
| - should-skip | |
| - test-linux-64 | |
| - test-linux-aarch64 | |
| - test-windows | |
| - doc | |
| steps: | |
| - name: Exit | |
| run: | | |
| # if any dependencies were cancelled, that's a failure | |
| # | |
| # see https://docs.github.com/en/actions/reference/workflows-and-actions/expressions#always | |
| # and https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/troubleshooting-required-status-checks#handling-skipped-but-required-checks | |
| # for why this cannot be encoded in the job-level `if:` field | |
| # | |
| # TL; DR: `$REASONS` | |
| # | |
| # The intersection of skipped-as-success and required status checks | |
| # creates a scenario where if you DON'T `always()` run this job, the | |
| # status check UI will block merging and if you DO `always()` run and | |
| # a dependency is _cancelled_ (due to a critical failure, which is | |
| # somehow not considered a failure ¯\_(ツ)_/¯) then the critically | |
| # failing job(s) will timeout causing a cancellation here and the | |
| # build to succeed which we don't want (originally this was just | |
| # 'exit 0') | |
| # | |
| # Note: When [doc-only] is in PR title, test jobs are intentionally | |
| # skipped and should not cause failure. | |
| doc_only=${{ needs.should-skip.outputs.doc-only }} | |
| if ${{ needs.doc.result == 'cancelled' }}; then | |
| exit 1 | |
| fi | |
| if [[ "${doc_only}" != "true" ]]; then | |
| if ${{ needs.test-linux-64.result == 'cancelled' || | |
| needs.test-linux-aarch64.result == 'cancelled' || | |
| needs.test-windows.result == 'cancelled' }}; then | |
| exit 1 | |
| fi | |
| fi | |
| exit 0 |