Add CUDA 13.3 support#582
Conversation
There was a problem hiding this comment.
Code Review
This pull request adds support for CUDA 13.3 (cuda-13030) across various bindings and crates, introduces download retry logic, and adds resume support to avoid redundant downloads in the bindings generator. The review comments correctly identify critical compilation errors in src/cufile/sys/mod.rs and src/nvrtc/sys/mod.rs where cudaError_enum and nvrtcResult are redefined for CUDA 13.3 without feature-gating the older definitions, which will cause name collisions.
Important
The consumer version of Gemini Code Assist on GitHub is being sunset. Starting June 18, 2026, new organization installations will be blocked, and all code review activity will officially cease on July 17, 2026.
For more details on the timeline and next steps, please review the Help Documentation.
| #[cfg(any(feature = "cuda-13030"))] | ||
| #[repr(u32)] | ||
| #[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] | ||
| pub enum cudaError_enum { | ||
| CUDA_SUCCESS = 0, | ||
| CUDA_ERROR_INVALID_VALUE = 1, | ||
| CUDA_ERROR_OUT_OF_MEMORY = 2, | ||
| CUDA_ERROR_NOT_INITIALIZED = 3, | ||
| CUDA_ERROR_DEINITIALIZED = 4, | ||
| CUDA_ERROR_PROFILER_DISABLED = 5, | ||
| CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, | ||
| CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, | ||
| CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, | ||
| CUDA_ERROR_STUB_LIBRARY = 34, | ||
| CUDA_ERROR_CALL_REQUIRES_NEWER_DRIVER = 36, | ||
| CUDA_ERROR_DEVICE_UNAVAILABLE = 46, | ||
| CUDA_ERROR_NO_DEVICE = 100, | ||
| CUDA_ERROR_INVALID_DEVICE = 101, | ||
| CUDA_ERROR_DEVICE_NOT_LICENSED = 102, | ||
| CUDA_ERROR_INVALID_IMAGE = 200, | ||
| CUDA_ERROR_INVALID_CONTEXT = 201, | ||
| CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, | ||
| CUDA_ERROR_MAP_FAILED = 205, | ||
| CUDA_ERROR_UNMAP_FAILED = 206, | ||
| CUDA_ERROR_ARRAY_IS_MAPPED = 207, | ||
| CUDA_ERROR_ALREADY_MAPPED = 208, | ||
| CUDA_ERROR_NO_BINARY_FOR_GPU = 209, | ||
| CUDA_ERROR_ALREADY_ACQUIRED = 210, | ||
| CUDA_ERROR_NOT_MAPPED = 211, | ||
| CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, | ||
| CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, | ||
| CUDA_ERROR_ECC_UNCORRECTABLE = 214, | ||
| CUDA_ERROR_UNSUPPORTED_LIMIT = 215, | ||
| CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, | ||
| CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, | ||
| CUDA_ERROR_INVALID_PTX = 218, | ||
| CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, | ||
| CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, | ||
| CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, | ||
| CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222, | ||
| CUDA_ERROR_JIT_COMPILATION_DISABLED = 223, | ||
| CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224, | ||
| CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225, | ||
| CUDA_ERROR_CONTAINED = 226, | ||
| CUDA_ERROR_INVALID_SOURCE = 300, | ||
| CUDA_ERROR_FILE_NOT_FOUND = 301, | ||
| CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, | ||
| CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, | ||
| CUDA_ERROR_OPERATING_SYSTEM = 304, | ||
| CUDA_ERROR_INVALID_HANDLE = 400, | ||
| CUDA_ERROR_ILLEGAL_STATE = 401, | ||
| CUDA_ERROR_LOSSY_QUERY = 402, | ||
| CUDA_ERROR_NOT_FOUND = 500, | ||
| CUDA_ERROR_NOT_READY = 600, | ||
| CUDA_ERROR_ILLEGAL_ADDRESS = 700, | ||
| CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, | ||
| CUDA_ERROR_LAUNCH_TIMEOUT = 702, | ||
| CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, | ||
| CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, | ||
| CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, | ||
| CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, | ||
| CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, | ||
| CUDA_ERROR_ASSERT = 710, | ||
| CUDA_ERROR_TOO_MANY_PEERS = 711, | ||
| CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, | ||
| CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, | ||
| CUDA_ERROR_HARDWARE_STACK_ERROR = 714, | ||
| CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, | ||
| CUDA_ERROR_MISALIGNED_ADDRESS = 716, | ||
| CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, | ||
| CUDA_ERROR_INVALID_PC = 718, | ||
| CUDA_ERROR_LAUNCH_FAILED = 719, | ||
| CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, | ||
| CUDA_ERROR_TENSOR_MEMORY_LEAK = 721, | ||
| CUDA_ERROR_NOT_PERMITTED = 800, | ||
| CUDA_ERROR_NOT_SUPPORTED = 801, | ||
| CUDA_ERROR_SYSTEM_NOT_READY = 802, | ||
| CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803, | ||
| CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, | ||
| CUDA_ERROR_MPS_CONNECTION_FAILED = 805, | ||
| CUDA_ERROR_MPS_RPC_FAILURE = 806, | ||
| CUDA_ERROR_MPS_SERVER_NOT_READY = 807, | ||
| CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808, | ||
| CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809, | ||
| CUDA_ERROR_MPS_CLIENT_TERMINATED = 810, | ||
| CUDA_ERROR_CDP_NOT_SUPPORTED = 811, | ||
| CUDA_ERROR_CDP_VERSION_MISMATCH = 812, | ||
| CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, | ||
| CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901, | ||
| CUDA_ERROR_STREAM_CAPTURE_MERGE = 902, | ||
| CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903, | ||
| CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904, | ||
| CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905, | ||
| CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906, | ||
| CUDA_ERROR_CAPTURED_EVENT = 907, | ||
| CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, | ||
| CUDA_ERROR_TIMEOUT = 909, | ||
| CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910, | ||
| CUDA_ERROR_EXTERNAL_DEVICE = 911, | ||
| CUDA_ERROR_INVALID_CLUSTER_SIZE = 912, | ||
| CUDA_ERROR_FUNCTION_NOT_LOADED = 913, | ||
| CUDA_ERROR_INVALID_RESOURCE_TYPE = 914, | ||
| CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915, | ||
| CUDA_ERROR_KEY_ROTATION = 916, | ||
| CUDA_ERROR_STREAM_DETACHED = 917, | ||
| CUDA_ERROR_GRAPH_RECAPTURE_FAILURE = 918, | ||
| CUDA_ERROR_UNKNOWN = 999, | ||
| } |
There was a problem hiding this comment.
The cudaError_enum is redefined here for CUDA 13.3, but the original definition on lines 1419-1528 is not feature-gated. This will cause a compilation error due to a name collision when the cuda-13030 feature is enabled.
To fix this, you should merge the two definitions. A common approach is to have a single enum definition and use #[cfg] attributes on the variants that are version-specific. For example, the CUDA_ERROR_GRAPH_RECAPTURE_FAILURE variant is new in 13.3.
Alternatively, you could wrap the older definition in #[cfg(not(any(feature = "cuda-13030")))] to avoid the redefinition.
There was a problem hiding this comment.
Not a collision — the definition above is gated on cfg(any(cuda-13010, cuda-13020)) and this one on cfg(any(cuda-13030)), and the cuda version features are mutually exclusive (you enable exactly one). Builds fine with -F cuda-13030, and the CI matrix checks every version feature.
The 13.3 copy also can't just be folded into the previous gate: it adds CUDA_ERROR_GRAPH_RECAPTURE_FAILURE = 918, so the enum body differs. The bindings generator merges identical definitions into a single any(...) gate and only emits a separate copy when the contents change between versions — same pattern as the other gated copies of this enum already in the file (one per distinct variant set).
| #[cfg(any(feature = "cuda-13030"))] | ||
| #[repr(u32)] | ||
| #[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)] | ||
| pub enum nvrtcResult { | ||
| NVRTC_SUCCESS = 0, | ||
| NVRTC_ERROR_OUT_OF_MEMORY = 1, | ||
| NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, | ||
| NVRTC_ERROR_INVALID_INPUT = 3, | ||
| NVRTC_ERROR_INVALID_PROGRAM = 4, | ||
| NVRTC_ERROR_INVALID_OPTION = 5, | ||
| NVRTC_ERROR_COMPILATION = 6, | ||
| NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, | ||
| NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, | ||
| NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, | ||
| NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, | ||
| NVRTC_ERROR_INTERNAL_ERROR = 11, | ||
| NVRTC_ERROR_TIME_FILE_WRITE_FAILED = 12, | ||
| NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED = 13, | ||
| NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED = 14, | ||
| NVRTC_ERROR_PCH_CREATE = 15, | ||
| NVRTC_ERROR_CANCELLED = 16, | ||
| NVRTC_ERROR_TIME_TRACE_FILE_WRITE_FAILED = 17, | ||
| NVRTC_ERROR_BUSY = 18, | ||
| } |
There was a problem hiding this comment.
The nvrtcResult enum is redefined here for CUDA 13.3, but the original definition on lines 76-94 is not feature-gated. This will cause a compilation error due to a name collision when the cuda-13030 feature is enabled.
To fix this, you should merge the two definitions. The new NVRTC_ERROR_BUSY variant should be added to the existing enum, guarded by the appropriate #[cfg] attribute.
Alternatively, you could wrap the older definition in #[cfg(not(any(feature = "cuda-13030")))] to avoid the redefinition.
There was a problem hiding this comment.
Same as the cufile comment — the existing definition is behind cfg(any(cuda-13000, cuda-13010, cuda-13020)), the new one behind cfg(any(cuda-13030)), and only one version feature is ever enabled, so there is no redefinition. Compiles clean with -F cuda-13030.
It also can't share the existing gate: 13.3 adds NVRTC_ERROR_BUSY = 18, so the enum body differs from the 13.0–13.2 one. The generator only splits an item into a new gated copy when its contents change between versions; identical definitions get merged into one any(...) gate.
|
Is there any progress on getting this merged? |
The maintaner are not responding. |
|
Hey sorry reviewing over the next few days. for future reference it would be easier to split this into a PR that fixes bindings generator separately. |
|
@corrm can you revert the changes that are in bindings generator to only the minimum necessary to generate the 13.3 bindings? and then split the other changes into a separate PR |
NVIDIA renamed the cuda_cccl redist module to cccl in the 13.3 manifest. Match on (major, minor) so 13.3+ uses the new name, and add 13.3.0 to the generated version list.
|
@chelsea0x3b done. I reverted the generator changes in this PR down to the minimum needed for 13.3 — just matching the redist module names on The rest of the generator work (resume support, skipping unneeded downloads, download retries, descriptive manifest errors) is now its own PR: #588, based on |
Closes #581
NVIDIA renamed the
cuda_ccclredist module toccclin the 13.3 manifest, which is what made the bindings generator panic. With that handled, the rest is the usual version bump:cuda-13030supportThe generator change in this PR is now the minimum needed for 13.3: match the redist module names on
(major, minor)so 13.3+ picks up the renamedccclmodule, plus adding 13.3.0 to the generated version list. The broader generator resilience work (resume support, skip-unneeded downloads, download retries, descriptive manifest errors) has been split out into #588 as requested.Tested locally against CUDA 13.3 (V13.3.33) on an RTX 4090:
cargo build -F cuda-13030and-F cuda-version-from-build-systemboth buildtest_par_launch, which fails the same way withcuda-13020on a cold GPU (timing-sensitive assert), so unrelated to this changecargo checkstill passes for cuda-13020, cuda-12090 and cuda-11040; clippy and fmt clean