diff --git a/include/alpaka/dev/DevCpu.hpp b/include/alpaka/dev/DevCpu.hpp index 1042e005b001..f0c5312c0365 100644 --- a/include/alpaka/dev/DevCpu.hpp +++ b/include/alpaka/dev/DevCpu.hpp @@ -6,7 +6,7 @@ #pragma once #include "alpaka/dev/Traits.hpp" -#include "alpaka/dev/common/QueueRegistry.hpp" +#include "alpaka/dev/common/DevGenericImpl.hpp" #include "alpaka/dev/cpu/SysInfo.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/platform/Traits.hpp" @@ -46,7 +46,7 @@ namespace alpaka namespace cpu::detail { //! The CPU device implementation. - using DevCpuImpl = alpaka::detail::QueueRegistry; + using DevCpuImpl = alpaka::detail::DevGenericImpl; } // namespace cpu::detail //! The CPU device handle. @@ -89,19 +89,32 @@ namespace alpaka return 0; } + static void setDeviceProperties(alpaka::DevCpu const&, alpaka::DeviceProperties& devProperties) + { + devProperties.name = cpu::detail::getCpuName(); + devProperties.totalGlobalMem = cpu::detail::getTotalGlobalMemSizeBytes(); + } + + friend struct trait::GetName; + friend struct trait::GetMemBytes; + friend struct trait::GetFreeMemBytes; + friend struct trait::GetWarpSizes; + friend struct trait::GetPreferredWarpSize; + private: std::shared_ptr m_spDevCpuImpl; }; namespace trait { + //! The CPU device name get trait specialization. template<> struct GetName { - ALPAKA_FN_HOST static auto getName(DevCpu const& /* dev */) -> std::string + ALPAKA_FN_HOST static auto getName(DevCpu const& dev) -> std::string { - return cpu::detail::getCpuName(); + return dev.m_spDevCpuImpl->deviceProperties(dev)->name; } }; @@ -109,9 +122,9 @@ namespace alpaka template<> struct GetMemBytes { - ALPAKA_FN_HOST static auto getMemBytes(DevCpu const& /* dev */) -> std::size_t + ALPAKA_FN_HOST static auto getMemBytes(DevCpu const& dev) -> std::size_t { - return cpu::detail::getTotalGlobalMemSizeBytes(); + return dev.m_spDevCpuImpl->deviceProperties(dev)->totalGlobalMem; } }; diff --git a/include/alpaka/dev/DevGenericSycl.hpp b/include/alpaka/dev/DevGenericSycl.hpp index da54448cd304..edfb23b0601c 100644 --- a/include/alpaka/dev/DevGenericSycl.hpp +++ b/include/alpaka/dev/DevGenericSycl.hpp @@ -9,6 +9,7 @@ #include "alpaka/core/Common.hpp" #include "alpaka/core/Sycl.hpp" #include "alpaka/dev/Traits.hpp" +#include "alpaka/dev/common/DeviceProperties.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/platform/Traits.hpp" #include "alpaka/queue/Properties.hpp" @@ -112,11 +113,41 @@ namespace alpaka return m_context; } + auto deviceProperties() -> std::optional& + { + std::call_once( + m_onceFlag, + [&]() + { + m_deviceProperties = std::make_optional(); + auto const& device = this->get_device(); + m_deviceProperties->name = device.template get_info(); + m_deviceProperties->totalGlobalMem + = device.template get_info(); + + std::vector warp_sizes + = device.template get_info(); + // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently + // does not + auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64); + if(find64 != warp_sizes.end()) + warp_sizes.erase(find64); + // Sort the warp sizes in decreasing order + std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{}); + m_deviceProperties->warpSizes = std::move(warp_sizes); + m_deviceProperties->preferredWarpSize = m_deviceProperties->warpSizes.front(); + }); + + return m_deviceProperties; + } + private: sycl::device m_device; sycl::context m_context; std::vector> m_queues; + std::optional m_deviceProperties; std::shared_mutex mutable m_mutex; + std::once_flag m_onceFlag; }; } // namespace detail @@ -154,14 +185,14 @@ namespace alpaka namespace trait { + //! The SYCL device name get trait specialization. template struct GetName> { static auto getName(DevGenericSycl const& dev) -> std::string { - auto const device = dev.getNativeHandle().first; - return device.template get_info(); + return dev.m_impl->deviceProperties()->name; } }; @@ -171,8 +202,7 @@ namespace alpaka { static auto getMemBytes(DevGenericSycl const& dev) -> std::size_t { - auto const device = dev.getNativeHandle().first; - return device.template get_info(); + return dev.m_impl->deviceProperties()->totalGlobalMem; } }; @@ -195,15 +225,7 @@ namespace alpaka { static auto getWarpSizes(DevGenericSycl const& dev) -> std::vector { - auto const device = dev.getNativeHandle().first; - std::vector warp_sizes = device.template get_info(); - // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not - auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64); - if(find64 != warp_sizes.end()) - warp_sizes.erase(find64); - // Sort the warp sizes in decreasing order - std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{}); - return warp_sizes; + return dev.m_impl->deviceProperties()->warpSizes; } }; @@ -213,7 +235,7 @@ namespace alpaka { static auto getPreferredWarpSize(DevGenericSycl const& dev) -> std::size_t { - return GetWarpSizes>::getWarpSizes(dev).front(); + return dev.m_impl->deviceProperties()->preferredWarpSize; } }; diff --git a/include/alpaka/dev/DevUniformCudaHipRt.hpp b/include/alpaka/dev/DevUniformCudaHipRt.hpp index 188a81665f20..abaf55f30ab0 100644 --- a/include/alpaka/dev/DevUniformCudaHipRt.hpp +++ b/include/alpaka/dev/DevUniformCudaHipRt.hpp @@ -10,7 +10,8 @@ #include "alpaka/core/Hip.hpp" #include "alpaka/core/Interface.hpp" #include "alpaka/dev/Traits.hpp" -#include "alpaka/dev/common/QueueRegistry.hpp" +#include "alpaka/dev/common/DevGenericImpl.hpp" +#include "alpaka/dev/common/DeviceProperties.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/platform/Traits.hpp" #include "alpaka/queue/Properties.hpp" @@ -20,6 +21,7 @@ #include "alpaka/wait/Traits.hpp" #include +#include #include #include @@ -27,6 +29,10 @@ namespace alpaka { + + template + class DevUniformCudaHipRt; + namespace trait { template @@ -62,7 +68,7 @@ namespace alpaka using IDeviceQueue = uniform_cuda_hip::detail::QueueUniformCudaHipRtImpl; protected: - DevUniformCudaHipRt() : m_QueueRegistry{std::make_shared>()} + DevUniformCudaHipRt() : m_DevGenericImpl{std::make_shared>()} { } @@ -84,42 +90,68 @@ namespace alpaka [[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector> { - return m_QueueRegistry->getAllExistingQueues(); + return m_DevGenericImpl->getAllExistingQueues(); } //! Registers the given queue on this device. //! NOTE: Every queue has to be registered for correct functionality of device wait operations! ALPAKA_FN_HOST auto registerQueue(std::shared_ptr spQueue) const -> void { - m_QueueRegistry->registerQueue(spQueue); + m_DevGenericImpl->registerQueue(spQueue); + } + + static void setDeviceProperties( + DevUniformCudaHipRt const& device, + alpaka::DeviceProperties& devProperties) + { + // There is cuda/hip-DeviceGetAttribute as faster alternative to + // cuda/hip-GetDeviceProperties to get a single device property but it has no option to get + // the name + auto devHandle = device.getNativeHandle(); + typename TApi::DeviceProp_t devProp; + ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, devHandle)); + devProperties.name = std::string(devProp.name); + + std::size_t freeInternal(0u); + std::size_t totalInternal(0u); + ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal)); + devProperties.totalGlobalMem = totalInternal; + + int warpSize = 0; + ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( + TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, devHandle)); + devProperties.warpSizes = std::vector{static_cast(warpSize)}; + devProperties.preferredWarpSize = static_cast(warpSize); } + friend struct trait::GetName>; + friend struct trait::GetMemBytes>; + friend struct trait::GetFreeMemBytes>; + friend struct trait::GetWarpSizes>; + friend struct trait::GetPreferredWarpSize>; + private: DevUniformCudaHipRt(int iDevice) : m_iDevice(iDevice) - , m_QueueRegistry(std::make_shared>()) + , m_DevGenericImpl(std::make_shared>()) { } int m_iDevice; - std::shared_ptr> m_QueueRegistry; + std::shared_ptr> m_DevGenericImpl; }; namespace trait { + //! The CUDA/HIP RT device name get trait specialization. template struct GetName> { ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt const& dev) -> std::string { - // There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties to get a - // single device property but it has no option to get the name - typename TApi::DeviceProp_t devProp; - ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle())); - - return std::string(devProp.name); + return dev.m_DevGenericImpl->deviceProperties(dev)->name; } }; @@ -129,15 +161,7 @@ namespace alpaka { ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt const& dev) -> std::size_t { - // Set the current device to wait for. - ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle())); - - std::size_t freeInternal(0u); - std::size_t totalInternal(0u); - - ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal)); - - return totalInternal; + return dev.m_DevGenericImpl->deviceProperties(dev)->totalGlobalMem; } }; @@ -147,12 +171,9 @@ namespace alpaka { ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt const& dev) -> std::size_t { - // Set the current device to wait for. ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle())); - std::size_t freeInternal(0u); std::size_t totalInternal(0u); - ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal)); return freeInternal; @@ -165,7 +186,7 @@ namespace alpaka { ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt const& dev) -> std::vector { - return {GetPreferredWarpSize>::getPreferredWarpSize(dev)}; + return dev.m_DevGenericImpl->deviceProperties(dev)->warpSizes; } }; @@ -175,11 +196,7 @@ namespace alpaka { ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt const& dev) -> std::size_t { - int warpSize = 0; - - ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( - TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, dev.getNativeHandle())); - return static_cast(warpSize); + return dev.m_DevGenericImpl->deviceProperties(dev)->preferredWarpSize; } }; diff --git a/include/alpaka/dev/common/QueueRegistry.hpp b/include/alpaka/dev/common/DevGenericImpl.hpp similarity index 71% rename from include/alpaka/dev/common/QueueRegistry.hpp rename to include/alpaka/dev/common/DevGenericImpl.hpp index 62055fc45439..9fcadd051f03 100644 --- a/include/alpaka/dev/common/QueueRegistry.hpp +++ b/include/alpaka/dev/common/DevGenericImpl.hpp @@ -5,19 +5,22 @@ #pragma once #include "alpaka/core/Common.hpp" +#include "alpaka/dev/common/DeviceProperties.hpp" #include #include #include #include +#include namespace alpaka::detail { + //! The CPU/GPU device queue registry implementation. //! //! @tparam TQueue queue implementation template - struct QueueRegistry + struct DevGenericImpl { ALPAKA_FN_HOST auto getAllExistingQueues() const -> std::vector> { @@ -52,8 +55,24 @@ namespace alpaka::detail m_queues.push_back(spQueue); } + template + auto deviceProperties(TDev const& device) -> std::optional& + { + std::call_once( + m_onceFlag, + [&]() noexcept + { + m_deviceProperties = std::make_optional(); + TDev::setDeviceProperties(device, *m_deviceProperties); + }); + + return m_deviceProperties; + } + private: std::mutex mutable m_Mutex; + std::once_flag m_onceFlag; + std::optional m_deviceProperties; std::deque> mutable m_queues; }; } // namespace alpaka::detail diff --git a/include/alpaka/dev/common/DeviceProperties.hpp b/include/alpaka/dev/common/DeviceProperties.hpp new file mode 100644 index 000000000000..f2c00636cf05 --- /dev/null +++ b/include/alpaka/dev/common/DeviceProperties.hpp @@ -0,0 +1,19 @@ + +#pragma once + +#include "alpaka/dev/Traits.hpp" + +#include +#include + +namespace alpaka +{ + + struct DeviceProperties + { + std::string name; + std::size_t totalGlobalMem; + std::vector warpSizes; + std::size_t preferredWarpSize; + }; +} // namespace alpaka