From ad1f394789306a73fb2c758cdb365c7da80972a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 10 Mar 2025 09:48:42 +0100 Subject: [PATCH 1/4] Bump versions of dependencies --- Project.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 9ad3196..f6f2e32 100644 --- a/Project.toml +++ b/Project.toml @@ -28,16 +28,16 @@ CUDAExt = "CUDA" CairoMakieExt = "CairoMakie" [compat] -CUDA = "3.8.4, 3.12, 4.4" -CairoMakie = "0.7, 0.10.7, 0.11, 0.12" +CUDA = "3.8.4, 3.12, 4.4, 5" +CairoMakie = "0.7, 0.10.7, 0.11, 0.12, 0.13" CpuId = "0.3" DocStringExtensions = "0.9" Glob = "1.3" -HDF5 = "0.16" +HDF5 = "0.16, 0.17" NVTX = "0.3" Reexport = "1.2" -TestItemRunner = "0.2" -ThreadPinning = "0.3, 0.4, 0.5, 0.6, 0.7" +TestItemRunner = "0.2, 1" +ThreadPinning = "0.3, 0.4, 0.5, 0.6, 0.7, 1" UnicodePlots = "2.8, 3" julia = "1.9" From 6f48a8bb87af26a80a0d71abacd06eb4b73e7962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 10 Mar 2025 16:01:00 +0100 Subject: [PATCH 2/4] Add test/Project.toml --- Project.toml | 11 +---------- test/Project.toml | 9 +++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 test/Project.toml diff --git a/Project.toml b/Project.toml index f6f2e32..518a858 100644 --- a/Project.toml +++ b/Project.toml @@ -38,15 +38,6 @@ NVTX = "0.3" Reexport = "1.2" TestItemRunner = "0.2, 1" ThreadPinning = "0.3, 0.4, 0.5, 0.6, 0.7, 1" +ThreadPools = "2.1.1" UnicodePlots = "2.8, 3" julia = "1.9" - -[extras] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" - -[targets] -test = ["Test", "InteractiveUtils", "CairoMakie", "CUDA", "TestItemRunner"] diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..ec0864e --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,9 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037" +GPUInspector = "608d808f-ff79-47b0-a25d-21f2c9b42853" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" From b8169e3dba4dcf86ade81f7685354684124b526b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 10 Mar 2025 16:01:13 +0100 Subject: [PATCH 3/4] Add ThreadPools --- Project.toml | 1 + ext/CUDAExt/CUDAExt.jl | 1 + ext/CUDAExt/implementations/monitoring.jl | 2 +- ext/CUDAExt/implementations/stresstest.jl | 2 +- src/GPUInspector.jl | 1 + src/stresstest_cpu.jl | 2 +- 6 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 518a858..c664543 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" +ThreadPools = "b189fb0b-2eb5-4ed4-bc0c-d34c51242431" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" [weakdeps] diff --git a/ext/CUDAExt/CUDAExt.jl b/ext/CUDAExt/CUDAExt.jl index 9d2a770..fd7fc08 100644 --- a/ext/CUDAExt/CUDAExt.jl +++ b/ext/CUDAExt/CUDAExt.jl @@ -12,6 +12,7 @@ using LinearAlgebra # pkgs using UnicodePlots using NVTX +import ThreadPools # for usage in CUDAExt using GPUInspector: diff --git a/ext/CUDAExt/implementations/monitoring.jl b/ext/CUDAExt/implementations/monitoring.jl index 483da9b..1417c7e 100644 --- a/ext/CUDAExt/implementations/monitoring.jl +++ b/ext/CUDAExt/implementations/monitoring.jl @@ -42,7 +42,7 @@ function monitoring_start( # spawn monitoring _monitoring!(true) - t = @tspawnat thread _monitor(func_symbol_vec; freq, devices=devs) + t = ThreadPools.@tspawnat thread _monitor(func_symbol_vec; freq, devices=devs) _set_monitoring_task(t) return nothing end diff --git a/ext/CUDAExt/implementations/stresstest.jl b/ext/CUDAExt/implementations/stresstest.jl index 358d9ca..e607f00 100644 --- a/ext/CUDAExt/implementations/stresstest.jl +++ b/ext/CUDAExt/implementations/stresstest.jl @@ -105,7 +105,7 @@ function _run_stresstests( throw(ArgumentError("length(threads) != length(tests)")) end @sync for (i, test) in enumerate(tests) - @tspawnat threads[i] test(; verbose=verbose) + ThreadPools.@tspawnat threads[i] test(; verbose=verbose) end else for test in tests diff --git a/src/GPUInspector.jl b/src/GPUInspector.jl index 9dbe9e9..63472bd 100644 --- a/src/GPUInspector.jl +++ b/src/GPUInspector.jl @@ -16,6 +16,7 @@ using UnicodePlots using CpuId: cachesize using HDF5: h5open using Glob: glob +import ThreadPools include("backends.jl") include("UnitPrefixedBytes.jl") diff --git a/src/stresstest_cpu.jl b/src/stresstest_cpu.jl index a4f5193..6e2be04 100644 --- a/src/stresstest_cpu.jl +++ b/src/stresstest_cpu.jl @@ -67,7 +67,7 @@ function _run_stresstests_cpu( if verbose @info("Julia thread $(threads[i]) runs test on CPU core $(core).") end - @tspawnat threads[i] begin + ThreadPools.@tspawnat threads[i] begin core_before = getcpuid() pinthread(core) _stresstest_cpu_kernel(; verbose, kwargs...) From bd09ababa12c6269d6cb3c6f41dedc3553458dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 10 Mar 2025 16:07:46 +0100 Subject: [PATCH 4/4] unsafe -> unchecked --- Project.toml | 2 +- ext/CUDAExt/cuda_wrappers.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index c664543..f17e52f 100644 --- a/Project.toml +++ b/Project.toml @@ -29,7 +29,7 @@ CUDAExt = "CUDA" CairoMakieExt = "CairoMakie" [compat] -CUDA = "3.8.4, 3.12, 4.4, 5" +CUDA = "5" CairoMakie = "0.7, 0.10.7, 0.11, 0.12, 0.13" CpuId = "0.3" DocStringExtensions = "0.9" diff --git a/ext/CUDAExt/cuda_wrappers.jl b/ext/CUDAExt/cuda_wrappers.jl index a5ebad9..7de602b 100644 --- a/ext/CUDAExt/cuda_wrappers.jl +++ b/ext/CUDAExt/cuda_wrappers.jl @@ -1,6 +1,6 @@ function supports_get_temperature(nvml_device::NVML.Device) temp = Ref{UInt32}() - nvml_return = NVML.unsafe_nvmlDeviceGetTemperature( + nvml_return = NVML.unchecked_nvmlDeviceGetTemperature( nvml_device, CUDA.NVML.NVML_TEMPERATURE_GPU, temp ) return nvml_return == NVML.NVML_SUCCESS @@ -30,7 +30,7 @@ get_temperatures(devices=CUDA.devices()) = [get_temperature(dev) for dev in devi function supports_get_power_usage(nvml_device::NVML.Device) power = Ref{UInt32}() - nvml_return = NVML.unsafe_nvmlDeviceGetPowerUsage(nvml_device, power) + nvml_return = NVML.unchecked_nvmlDeviceGetPowerUsage(nvml_device, power) return nvml_return == NVML.NVML_SUCCESS end function supports_get_power_usage(dev::CuDevice) @@ -56,7 +56,7 @@ get_power_usages(devices=CUDA.devices()) = [get_power_usage(dev) for dev in devi function supports_get_gpu_utilization(nvml_device::NVML.Device) util = Ref{NVML.nvmlUtilization_t}() - nvml_return = NVML.unsafe_nvmlDeviceGetUtilizationRates(nvml_device, util) + nvml_return = NVML.unchecked_nvmlDeviceGetUtilizationRates(nvml_device, util) return nvml_return == NVML.NVML_SUCCESS end function supports_get_gpu_utilization(dev::CuDevice)