@@ -122,19 +122,20 @@ namespace alpaka
122122 {
123123 ALPAKA_FN_HOST static auto getName (DevUniformCudaHipRt<TApi> const & dev) -> std::string
124124 {
125+ auto & name = dev.m_QueueRegistry ->deviceProperties ().name ;
125126 {
126127 std::lock_guard<std::mutex> lock (dev.m_QueueRegistry ->mutex ());
127- if (!dev. m_QueueRegistry -> deviceProperties (). name .has_value ())
128+ if (!name.has_value ())
128129 {
129130 // There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties
130131 // to get a single device property but it has no option to get the name
131132 typename TApi::DeviceProp_t devProp;
132133 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::getDeviceProperties (&devProp, dev.getNativeHandle ()));
133- dev. m_QueueRegistry -> deviceProperties (). name = std::string (devProp.name );
134+ name = std::string (devProp.name );
134135 }
135136 }
136137
137- return dev. m_QueueRegistry -> deviceProperties (). name .value ();
138+ return name.value ();
138139 }
139140 };
140141
@@ -144,9 +145,10 @@ namespace alpaka
144145 {
145146 ALPAKA_FN_HOST static auto getMemBytes (DevUniformCudaHipRt<TApi> const & dev) -> std::size_t
146147 {
148+ auto & totalGlobalMem = dev.m_QueueRegistry ->deviceProperties ().totalGlobalMem ;
147149 {
148150 std::lock_guard<std::mutex> lock (dev.m_QueueRegistry ->mutex ());
149- if (!dev. m_QueueRegistry -> deviceProperties (). totalGlobalMem .has_value ())
151+ if (!totalGlobalMem.has_value ())
150152 {
151153 // Set the current device to wait for.
152154 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::setDevice (dev.getNativeHandle ()));
@@ -156,11 +158,11 @@ namespace alpaka
156158
157159 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::memGetInfo (&freeInternal, &totalInternal));
158160
159- dev. m_QueueRegistry -> deviceProperties (). totalGlobalMem = totalInternal;
161+ totalGlobalMem = totalInternal;
160162 }
161163 }
162164
163- return dev. m_QueueRegistry -> deviceProperties (). totalGlobalMem .value ();
165+ return totalGlobalMem.value ();
164166 }
165167 };
166168
@@ -170,10 +172,11 @@ namespace alpaka
170172 {
171173 ALPAKA_FN_HOST static auto getFreeMemBytes (DevUniformCudaHipRt<TApi> const & dev) -> std::size_t
172174 {
175+ auto & freeInternal = dev.m_QueueRegistry ->deviceProperties ().freeInternal ;
173176 std::size_t freeInternal (0u );
174177 {
175178 std::lock_guard<std::mutex> lock (dev.m_QueueRegistry ->mutex ());
176- if (!dev. m_QueueRegistry -> deviceProperties (). totalGlobalMem .has_value ())
179+ if (!totalGlobalMem.has_value ())
177180 {
178181 // Set the current device to wait for.
179182 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::setDevice (dev.getNativeHandle ()));
@@ -182,7 +185,7 @@ namespace alpaka
182185
183186 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::memGetInfo (&freeInternal, &totalInternal));
184187
185- dev. m_QueueRegistry -> deviceProperties (). totalGlobalMem = totalInternal;
188+ totalGlobalMem = totalInternal;
186189 }
187190 }
188191
@@ -196,15 +199,28 @@ namespace alpaka
196199 {
197200 ALPAKA_FN_HOST static auto getWarpSizes (DevUniformCudaHipRt<TApi> const & dev) -> std::vector<std::size_t>
198201 {
202+ auto & warpSizes = dev.m_QueueRegistry ->deviceProperties ().warpSizes ;
199203 {
200204 std::lock_guard<std::mutex> lock (dev.m_QueueRegistry ->mutex ());
201- if (!dev. m_QueueRegistry -> deviceProperties (). warpSizes .has_value ())
205+ if (!warpSizes.has_value ())
202206 {
203- dev.m_QueueRegistry ->deviceProperties ().warpSizes = std::vector<std::size_t >{
204- GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>::getPreferredWarpSize (dev)};
207+ if (dev.m_QueueRegistry ->deviceProperties ().preferredWarpSize .has_value ())
208+ {
209+ warpSizes = std::vector<std::size_t >{
210+ dev.m_QueueRegistry ->deviceProperties ().preferredWarpSize .value ()};
211+ }
212+ else
213+ {
214+ int warpSize = 0 ;
215+ ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (TApi::deviceGetAttribute (
216+ &warpSize,
217+ TApi::deviceAttributeWarpSize,
218+ dev.getNativeHandle ()));
219+ warpSizes = std::vector<std::size_t >{warpSize};
220+ }
205221 }
206222 }
207- return dev. m_QueueRegistry -> deviceProperties (). warpSizes .value ();
223+ return warpSizes.value ();
208224 }
209225 };
210226
@@ -214,19 +230,20 @@ namespace alpaka
214230 {
215231 ALPAKA_FN_HOST static auto getPreferredWarpSize (DevUniformCudaHipRt<TApi> const & dev) -> std::size_t
216232 {
233+ auto & preferredWarpSize = dev.m_QueueRegistry ->deviceProperties ().preferredWarpSize ;
217234 {
218235 std::lock_guard<std::mutex> lock (dev.m_QueueRegistry ->mutex ());
219- if (!dev. m_QueueRegistry -> deviceProperties (). preferredWarpSize .has_value ())
236+ if (!preferredWarpSize.has_value ())
220237 {
221238 int warpSize = 0 ;
222239
223240 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK (
224241 TApi::deviceGetAttribute (&warpSize, TApi::deviceAttributeWarpSize, dev.getNativeHandle ()));
225- dev. m_QueueRegistry -> deviceProperties (). preferredWarpSize = static_cast <std::size_t >(warpSize);
242+ preferredWarpSize = static_cast <std::size_t >(warpSize);
226243 }
227244 }
228245
229- return dev. m_QueueRegistry -> deviceProperties (). preferredWarpSize .value ();
246+ return preferredWarpSize.value ();
230247 }
231248 };
232249
0 commit comments