|
| 1 | +""" |
| 2 | +Test that CUDA tools (nvcc, nsys, ncu) work correctly. |
| 3 | +
|
| 4 | +Compiles a minimal CUDA C++ program using Thrust and verifies that both |
| 5 | +Nsight Systems and Nsight Compute can successfully profile it. |
| 6 | +""" |
| 7 | + |
| 8 | +import pytest |
| 9 | +import subprocess |
| 10 | +from pathlib import Path |
| 11 | + |
| 12 | +CUDA_PROGRAM = r""" |
| 13 | +#include <thrust/device_vector.h> |
| 14 | +#include <thrust/sequence.h> |
| 15 | +#include <thrust/reduce.h> |
| 16 | +#include <iostream> |
| 17 | +#include <cstdlib> |
| 18 | +
|
| 19 | +int main() { |
| 20 | + constexpr int n = 256; |
| 21 | +
|
| 22 | + thrust::device_vector<float> d(n); |
| 23 | + thrust::sequence(d.begin(), d.end()); |
| 24 | +
|
| 25 | + float sum = thrust::reduce(d.begin(), d.end()); |
| 26 | + float expected = n * (n - 1) / 2.0f; |
| 27 | +
|
| 28 | + if (sum != expected) { |
| 29 | + std::cerr << "Mismatch: got " << sum << ", expected " << expected << std::endl; |
| 30 | + return EXIT_FAILURE; |
| 31 | + } |
| 32 | +
|
| 33 | + std::cout << "PASS" << std::endl; |
| 34 | +} |
| 35 | +""" |
| 36 | + |
| 37 | + |
| 38 | +@pytest.fixture(scope="module") |
| 39 | +def cuda_binary(tmp_path_factory): |
| 40 | + """Compile a minimal CUDA C++ program and return the path to the binary.""" |
| 41 | + tmp_dir = tmp_path_factory.mktemp("nsight_test") |
| 42 | + src_path = tmp_dir / "test_program.cu" |
| 43 | + bin_path = tmp_dir / "test_program" |
| 44 | + |
| 45 | + src_path.write_text(CUDA_PROGRAM) |
| 46 | + |
| 47 | + result = subprocess.run( |
| 48 | + ["nvcc", "-o", str(bin_path), str(src_path)], |
| 49 | + capture_output=True, text=True, timeout=120 |
| 50 | + ) |
| 51 | + assert result.returncode == 0, \ |
| 52 | + f"nvcc compilation failed:\nstdout: {result.stdout}\nstderr: {result.stderr}" |
| 53 | + assert bin_path.exists(), "Binary was not created" |
| 54 | + |
| 55 | + return bin_path |
| 56 | + |
| 57 | + |
| 58 | +def test_cuda_binary_runs(cuda_binary): |
| 59 | + """Verify the compiled CUDA binary runs successfully.""" |
| 60 | + result = subprocess.run( |
| 61 | + [str(cuda_binary)], |
| 62 | + capture_output=True, text=True, timeout=30 |
| 63 | + ) |
| 64 | + assert result.returncode == 0, \ |
| 65 | + f"CUDA binary failed:\nstdout: {result.stdout}\nstderr: {result.stderr}" |
| 66 | + assert "PASS" in result.stdout |
| 67 | + |
| 68 | + |
| 69 | +def test_nsys_profile(cuda_binary, tmp_path): |
| 70 | + """Test that nsys can profile the CUDA binary.""" |
| 71 | + report_path = tmp_path / "test_report.nsys-rep" |
| 72 | + |
| 73 | + result = subprocess.run( |
| 74 | + ["nsys", "profile", |
| 75 | + "--force-overwrite=true", |
| 76 | + "--output", str(report_path), |
| 77 | + str(cuda_binary)], |
| 78 | + capture_output=True, text=True, timeout=120 |
| 79 | + ) |
| 80 | + assert result.returncode == 0, \ |
| 81 | + f"nsys profile failed:\nstdout: {result.stdout}\nstderr: {result.stderr}" |
| 82 | + assert report_path.exists(), "nsys report file was not created" |
| 83 | + |
| 84 | + |
| 85 | +def test_ncu_profile(cuda_binary): |
| 86 | + """Test that ncu can profile the CUDA binary.""" |
| 87 | + result = subprocess.run( |
| 88 | + ["ncu", |
| 89 | + "--target-processes=all", |
| 90 | + "--set=basic", |
| 91 | + str(cuda_binary)], |
| 92 | + capture_output=True, text=True, timeout=120 |
| 93 | + ) |
| 94 | + assert result.returncode == 0, \ |
| 95 | + f"ncu profile failed:\nstdout: {result.stdout}\nstderr: {result.stderr}" |
0 commit comments