Skip to content

Commit 3b80acd

Browse files
committed
Merge remote-tracking branch 'upstream/main' into cuda.core.system-affinity
2 parents 3a621e5 + 0b1b3fd commit 3b80acd

File tree

14 files changed

+598
-56
lines changed

14 files changed

+598
-56
lines changed

.github/workflows/build-docs.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ jobs:
4747
run:
4848
shell: bash -el {0}
4949
steps:
50+
- name: validate build-ctk
51+
run: |
52+
if [ ! "${{ inputs.build-ctk-ver }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]; then
53+
echo "error: `build-ctk-ver` ${{ inputs.build-ctk-ver }} version does not match MAJOR.MINOR.MICRO" >&2
54+
exit 1
55+
fi
5056
- name: Checkout ${{ github.event.repository.name }}
5157
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
5258
with:

.github/workflows/ci.yml

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,12 @@ jobs:
6262
echo "skip=${skip}" >> "$GITHUB_OUTPUT"
6363
echo "doc_only=${doc_only}" >> "$GITHUB_OUTPUT"
6464
65-
# WARNING: make sure all of the build jobs are in sync
65+
# NOTE: Build jobs are intentionally split by platform rather than using a single
66+
# matrix. This allows each test job to depend only on its corresponding build,
67+
# so faster platforms can proceed through build & test without waiting for slower
68+
# ones. Keep these job definitions textually identical except for:
69+
# - host-platform value
70+
# - if: condition (build-linux-64 omits doc-only check since it's needed for docs)
6671
build-linux-64:
6772
needs:
6873
- ci-vars
@@ -81,7 +86,7 @@ jobs:
8186
cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
8287
prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }}
8388

84-
# WARNING: make sure all of the build jobs are in sync
89+
# See build-linux-64 for why build jobs are split by platform.
8590
build-linux-aarch64:
8691
needs:
8792
- ci-vars
@@ -100,7 +105,7 @@ jobs:
100105
cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
101106
prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }}
102107

103-
# WARNING: make sure all of the build jobs are in sync
108+
# See build-linux-64 for why build jobs are split by platform.
104109
build-windows:
105110
needs:
106111
- ci-vars
@@ -119,7 +124,11 @@ jobs:
119124
cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
120125
prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }}
121126

122-
# WARNING: make sure both Linux test jobs are in sync
127+
# NOTE: Test jobs are split by platform for the same reason as build jobs (see
128+
# build-linux-64). Keep these job definitions textually identical except for:
129+
# - host-platform value
130+
# - build job under needs:
131+
# - uses: (test-wheel-linux.yml vs test-wheel-windows.yml)
123132
test-linux-64:
124133
strategy:
125134
fail-fast: false
@@ -141,21 +150,20 @@ jobs:
141150
host-platform: ${{ matrix.host-platform }}
142151
build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
143152

144-
# WARNING: make sure both Linux test jobs are in sync
153+
# See test-linux-64 for why test jobs are split by platform.
145154
test-linux-aarch64:
146155
strategy:
147156
fail-fast: false
148157
matrix:
149158
host-platform:
150159
- linux-aarch64
151160
name: Test ${{ matrix.host-platform }}
152-
# Note: No doc-only check needed here - if build-linux-aarch64 is skipped,
153-
# this job is automatically skipped due to the dependency.
154-
if: ${{ github.repository_owner == 'nvidia' }}
161+
if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.doc-only) }}
155162
permissions:
156163
contents: read # This is required for actions/checkout
157164
needs:
158165
- ci-vars
166+
- should-skip
159167
- build-linux-aarch64
160168
secrets: inherit
161169
uses: ./.github/workflows/test-wheel-linux.yml
@@ -164,20 +172,20 @@ jobs:
164172
host-platform: ${{ matrix.host-platform }}
165173
build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
166174

175+
# See test-linux-64 for why test jobs are split by platform.
167176
test-windows:
168177
strategy:
169178
fail-fast: false
170179
matrix:
171180
host-platform:
172181
- win-64
173182
name: Test ${{ matrix.host-platform }}
174-
# Note: No doc-only check needed here - if build-windows is skipped,
175-
# this job is automatically skipped due to the dependency.
176-
if: ${{ github.repository_owner == 'nvidia' }}
183+
if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.doc-only) }}
177184
permissions:
178185
contents: read # This is required for actions/checkout
179186
needs:
180187
- ci-vars
188+
- should-skip
181189
- build-windows
182190
secrets: inherit
183191
uses: ./.github/workflows/test-wheel-windows.yml

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ jobs:
135135
needs:
136136
- check-tag
137137
- determine-run-id
138+
- doc
138139
secrets: inherit
139140
uses: ./.github/workflows/release-upload.yml
140141
with:
@@ -148,6 +149,7 @@ jobs:
148149
needs:
149150
- check-tag
150151
- determine-run-id
152+
- doc
151153
environment:
152154
name: ${{ inputs.wheel-dst }}
153155
url: https://${{ (inputs.wheel-dst == 'testpypi' && 'test.') || '' }}pypi.org/p/${{ inputs.component }}/

cuda_bindings/cuda/bindings/_nvml.pyx

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,16 +1198,16 @@ class PowerMizerMode(_IntEnum):
11981198

11991199

12001200
class DeviceArch(_IntEnum):
1201-
DEVICE_ARCH_KEPLER = 2
1202-
DEVICE_ARCH_MAXWELL = 3
1203-
DEVICE_ARCH_PASCAL = 4
1204-
DEVICE_ARCH_VOLTA = 5
1205-
DEVICE_ARCH_TURING = 6
1206-
DEVICE_ARCH_AMPERE = 7
1207-
DEVICE_ARCH_ADA = 8
1208-
DEVICE_ARCH_HOPPER = 9
1209-
DEVICE_ARCH_BLACKWELL = 10
1210-
DEVICE_ARCH_UNKNOWN = 0xFFFFFFFF
1201+
KEPLER = 2
1202+
MAXWELL = 3
1203+
PASCAL = 4
1204+
VOLTA = 5
1205+
TURING = 6
1206+
AMPERE = 7
1207+
ADA = 8
1208+
HOPPER = 9
1209+
BLACKWELL = 10
1210+
UNKNOWN = 0xFFFFFFFF
12111211

12121212

12131213
class BusType(_IntEnum):

cuda_core/cuda/core/_event.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ cdef class Event:
172172
raise RuntimeError(explanation)
173173

174174
def __hash__(self) -> int:
175-
return hash((type(self), as_intptr(self._h_context), as_intptr(self._h_event)))
175+
return hash((type(self), as_intptr(self._h_event)))
176176

177177
def __eq__(self, other) -> bool:
178178
# Note: using isinstance because `Event` can be subclassed.

cuda_core/cuda/core/_stream.pyx

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -217,22 +217,12 @@ cdef class Stream:
217217
return (0, as_intptr(self._h_stream))
218218

219219
def __hash__(self) -> int:
220-
# Ensure context is initialized for hash consistency
221-
Stream_ensure_ctx(self)
222-
return hash((as_intptr(self._h_context), as_intptr(self._h_stream)))
220+
return hash(as_intptr(self._h_stream))
223221

224222
def __eq__(self, other) -> bool:
225223
if not isinstance(other, Stream):
226224
return NotImplemented
227-
cdef Stream _other = <Stream>other
228-
# Fast path: compare handles first
229-
if as_intptr(self._h_stream) != as_intptr(_other._h_stream):
230-
return False
231-
# Ensure contexts are initialized for both streams
232-
Stream_ensure_ctx(self)
233-
Stream_ensure_ctx(_other)
234-
# Compare contexts as well
235-
return as_intptr(self._h_context) == as_intptr(_other._h_context)
225+
return as_intptr(self._h_stream) == as_intptr((<Stream>other)._h_stream)
236226

237227
@property
238228
def handle(self) -> cuda.bindings.driver.CUstream:

cuda_core/cuda/core/system/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,12 @@
2525
if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
2626
from ._device import *
2727
from ._device import __all__ as _device_all
28+
from ._system_events import *
29+
from ._system_events import __all__ as _system_events_all
2830
from .exceptions import *
2931
from .exceptions import __all__ as _exceptions_all
3032

3133
__all__.append("get_nvml_version")
3234
__all__.extend(_device_all)
35+
__all__.extend(_system_events_all)
3336
__all__.extend(_exceptions_all)

0 commit comments

Comments
 (0)