NVIDIA
diff --git a/‎cuda_bindings/tests/nvml/conftest.py‎
Lines changed: 37 additions & 0 deletions b/‎cuda_bindings/tests/nvml/conftest.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎cuda_bindings/tests/nvml/test_compute_mode.py‎
Lines changed: 3 additions & 8 deletions b/‎cuda_bindings/tests/nvml/test_compute_mode.py‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎cuda_bindings/tests/nvml/test_gpu.py‎
Lines changed: 5 additions & 13 deletions b/‎cuda_bindings/tests/nvml/test_gpu.py‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎cuda_bindings/tests/nvml/test_pynvml.py‎
Lines changed: 19 additions & 14 deletions b/‎cuda_bindings/tests/nvml/test_pynvml.py‎
Lines changed: 19 additions & 14 deletions
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 from collections import namedtuple
+from contextlib import contextmanager
 
 import pytest
 from cuda.bindings import _nvml as nvml
@@ -128,3 +129,39 @@ def pci_info(ngpus, handles):
     pci_info = [nvml.device_get_pci_info_v3(handles[i]) for i in range(ngpus)]
     assert len(pci_info) == ngpus
     return pci_info
+
+
+@contextmanager
+def unsupported_before(device: int, expected_device_arch: nvml.DeviceArch | str | None):
+    device_arch = nvml.device_get_architecture(device)
+
+    if isinstance(expected_device_arch, nvml.DeviceArch):
+        expected_device_arch_int = int(expected_device_arch)
+    elif expected_device_arch == "FERMI":
+        expected_device_arch_int = 1
+    else:
+        expected_device_arch_int = 0
+
+    if expected_device_arch is None or expected_device_arch == "HAS_INFOROM" or device_arch == nvml.DeviceArch.UNKNOWN:
+        # In this case, we don't /know/ if it will fail, but we are ok if it
+        # does or does not.
+
+        # TODO: There are APIs that are documented as supported only if the
+        # device has an InfoROM, but I couldn't find a way to detect that.  For
+        # now, they are just handled as "possibly failing".
+
+        try:
+            yield
+        except nvml.NotSupportedError:
+            pytest.skip(
+                f"Unsupported call for device architecture {nvml.DeviceArch(device_arch).name} "
+                f"on device '{nvml.device_get_name(device)}'"
+            )
+    elif int(device_arch) < expected_device_arch_int:
+        # In this case, we /know/ if will fail, and we want to assert that it does.
+        with pytest.raises(nvml.NotSupportedError):
+            yield
+        pytest.skip("Unsupported before {expected_device_arch.name}, got {nvml.device_get_name(device)}")
+    else:
+        # In this case, we /know/ it should work, and if it fails, the test should fail.
+        yield
@@ -7,6 +7,8 @@
 import pytest
 from cuda.bindings import _nvml as nvml
 
+from .conftest import unsupported_before
+
 COMPUTE_MODES = [
     nvml.ComputeMode.COMPUTEMODE_DEFAULT,
     nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
@@ -16,18 +18,11 @@
 
 @pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
 def test_compute_mode_supported_nonroot(all_devices):
-    skip_reasons = set()
     for device in all_devices:
-        try:
+        with unsupported_before(device, None):
             original_compute_mode = nvml.device_get_compute_mode(device)
-        except nvml.NotSupportedError:
-            skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
-            continue
 
         for cm in COMPUTE_MODES:
             with pytest.raises(nvml.NoPermissionError):
                 nvml.device_set_compute_mode(device, cm)
             assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
-
-    if skip_reasons:
-        pytest.skip(" ; ".join(skip_reasons))
@@ -5,6 +5,7 @@
 from cuda.bindings import _nvml as nvml
 
 from . import util
+from .conftest import unsupported_before
 
 
 def test_gpu_get_module_id(nvml_init):
@@ -23,23 +24,14 @@ def test_gpu_get_module_id(nvml_init):
 
 
 def test_gpu_get_platform_info(all_devices):
-    skip_reasons = set()
     for device in all_devices:
         if util.is_vgpu(device):
-            skip_reasons.add(f"Not supported on vGPU device {device}")
-            continue
+            pytest.skip(f"Not supported on vGPU device {device}")
 
-        # TODO
-        # if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
-        #     test_utils.skip_test("Not supported on chip before Blackwell")
+        # Documentation says Blackwell or newer only, but this does seem to pass
+        # on some newer GPUs.
 
-        try:
+        with unsupported_before(device, None):
             platform_info = nvml.device_get_platform_info(device)
-        except nvml.NotSupportedError:
-            skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
-            continue
 
         assert isinstance(platform_info, nvml.PlatformInfo_v2)
-
-    if skip_reasons:
-        pytest.skip(" ; ".join(skip_reasons))
@@ -10,6 +10,7 @@
 from cuda.bindings import _nvml as nvml
 
 from . import util
+from .conftest import unsupported_before
 
 XFAIL_LEGACY_NVLINK_MSG = "Legacy NVLink test expected to fail."
 
@@ -66,7 +67,8 @@ def test_device_get_handle_by_pci_bus_id(ngpus, pci_info):
 def test_device_get_memory_affinity(handles, scope):
     size = 1024
     for handle in handles:
-        node_set = nvml.device_get_memory_affinity(handle, size, scope)
+        with unsupported_before(handle, nvml.DeviceArch.KEPLER):
+            node_set = nvml.device_get_memory_affinity(handle, size, scope)
         assert node_set is not None
         assert len(node_set) == size
 
@@ -76,7 +78,8 @@ def test_device_get_memory_affinity(handles, scope):
 def test_device_get_cpu_affinity_within_scope(handles, scope):
     size = 1024
     for handle in handles:
-        cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
+        with unsupported_before(handle, nvml.DeviceArch.KEPLER):
+            cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
         assert cpu_set is not None
         assert len(cpu_set) == size
 
@@ -136,22 +139,22 @@ def test_device_get_p2p_status(handles, index):
 
 def test_device_get_power_usage(ngpus, handles):
     for i in range(ngpus):
-        try:
+        # Note: documentation says this is supported on Fermi or newer,
+        # but in practice it fails on some later architectures.
+        with unsupported_before(handles[i], None):
             power_mwatts = nvml.device_get_power_usage(handles[i])
-        except nvml.NotSupportedError:
-            pytest.skip("device_get_power_usage not supported")
         assert power_mwatts >= 0.0
 
 
 def test_device_get_total_energy_consumption(ngpus, handles):
     for i in range(ngpus):
-        try:
+        with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
             energy_mjoules1 = nvml.device_get_total_energy_consumption(handles[i])
-        except nvml.NotSupportedError:
-            pytest.skip("device_get_total_energy_consumption not supported")
+
         for j in range(10):  # idle for 150 ms
             time.sleep(0.015)  # and check for increase every 15 ms
-            energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
+            with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
+                energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
             assert energy_mjoules2 >= energy_mjoules1
             if energy_mjoules2 > energy_mjoules1:
                 break
@@ -182,7 +185,8 @@ def test_device_get_memory_info(ngpus, handles):
 
 def test_device_get_utilization_rates(ngpus, handles):
     for i in range(ngpus):
-        urate = nvml.device_get_utilization_rates(handles[i])
+        with unsupported_before(handles[i], "FERMI"):
+            urate = nvml.device_get_utilization_rates(handles[i])
         assert urate.gpu >= 0
         assert urate.memory >= 0
 
@@ -239,7 +243,8 @@ def test_device_get_utilization_rates(ngpus, handles):
 
 def test_device_get_pcie_throughput(ngpus, handles):
     for i in range(ngpus):
-        tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
+        with unsupported_before(handles[i], nvml.DeviceArch.MAXWELL):
+            tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
         assert tx_bytes_tp >= 0
         rx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_RX_BYTES)
         assert rx_bytes_tp >= 0
@@ -271,10 +276,10 @@ def test_device_get_pcie_throughput(ngpus, handles):
 def test_device_get_nvlink_capability(ngpus, handles, cap_type):
     for i in range(ngpus):
         for j in range(nvml.NVLINK_MAX_LINKS):
-            try:
+            # By the documentation, this should be supported on PASCAL or newer,
+            # but this also seems to fail on newer.
+            with unsupported_before(handles[i], None):
                 cap = nvml.device_get_nvlink_capability(handles[i], j, cap_type)
-            except nvml.NotSupportedError:
-                pytest.skip("NVLink capability not supported")
             assert cap >= 0