Skip to content

Commit 309f1bd

Browse files
committed
Fix deadlock condition in cuda/hip and make sintax lighter
1 parent e49e604 commit 309f1bd

File tree

3 files changed

+55
-34
lines changed

3 files changed

+55
-34
lines changed

include/alpaka/dev/DevCpu.hpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,15 @@ namespace alpaka
107107
{
108108
ALPAKA_FN_HOST static auto getName(DevCpu const& dev) -> std::string
109109
{
110+
auto& name = dev.m_spDevCpuImpl->deviceProperties().name;
110111
{
111112
std::lock_guard<std::mutex> lock(dev.m_spDevCpuImpl->mutex());
112-
if(!dev.m_spDevCpuImpl->deviceProperties().name.has_value())
113+
if(!name.has_value())
113114
{
114-
dev.m_spDevCpuImpl->deviceProperties().name = cpu::detail::getCpuName();
115+
name = cpu::detail::getCpuName();
115116
}
116117
}
117-
return dev.m_spDevCpuImpl->deviceProperties().name.value();
118+
return name.value();
118119
}
119120
};
120121

@@ -124,15 +125,15 @@ namespace alpaka
124125
{
125126
ALPAKA_FN_HOST static auto getMemBytes(DevCpu const& dev) -> std::size_t
126127
{
128+
auto& totalGlobalMem = dev.m_spDevCpuImpl->deviceProperties().totalGlobalMem;
127129
{
128130
std::lock_guard<std::mutex> lock(dev.m_spDevCpuImpl->mutex());
129-
if(!dev.m_spDevCpuImpl->deviceProperties().totalGlobalMem.has_value())
131+
if(!totalGlobalMem.has_value())
130132
{
131-
dev.m_spDevCpuImpl->deviceProperties().totalGlobalMem
132-
= cpu::detail::getTotalGlobalMemSizeBytes();
133+
totalGlobalMem = cpu::detail::getTotalGlobalMemSizeBytes();
133134
}
134135
}
135-
return dev.m_spDevCpuImpl->deviceProperties().totalGlobalMem.value();
136+
return totalGlobalMem.value();
136137
}
137138
};
138139

include/alpaka/dev/DevGenericSycl.hpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,16 @@ namespace alpaka
172172
{
173173
static auto getName(DevGenericSycl<TTag> const& dev) -> std::string
174174
{
175+
auto& name = dev.m_impl->deviceProperties().name;
175176
{
176177
std::lock_guard<std::shared_mutex> lock(dev.m_impl->mutex());
177-
if(!dev.m_impl->deviceProperties().name.has_value())
178+
if(!name.has_value())
178179
{
179180
auto const device = dev.getNativeHandle().first;
180-
dev.m_impl->deviceProperties().name = device.template get_info<sycl::info::device::name>();
181+
name = device.template get_info<sycl::info::device::name>();
181182
}
182183
}
183-
return dev.m_impl->deviceProperties().name.value();
184+
return name.value();
184185
}
185186
};
186187

@@ -190,16 +191,16 @@ namespace alpaka
190191
{
191192
static auto getMemBytes(DevGenericSycl<TTag> const& dev) -> std::size_t
192193
{
194+
auto& totalGlobalMem = dev.m_impl->deviceProperties().totalGlobalMem;
193195
{
194196
std::lock_guard<std::shared_mutex> lock(dev.m_impl->mutex());
195-
if(!dev.m_impl->deviceProperties().totalGlobalMem.has_value())
197+
if(!totalGlobalMem.has_value())
196198
{
197199
auto const device = dev.getNativeHandle().first;
198-
dev.m_impl->deviceProperties().totalGlobalMem
199-
= device.template get_info<sycl::info::device::global_mem_size>();
200+
totalGlobalMem = device.template get_info<sycl::info::device::global_mem_size>();
200201
}
201202
}
202-
return dev.m_impl->deviceProperties().totalGlobalMem.value();
203+
return totalGlobalMem.value();
203204
}
204205
};
205206

@@ -222,9 +223,10 @@ namespace alpaka
222223
{
223224
static auto getWarpSizes(DevGenericSycl<TTag> const& dev) -> std::vector<std::size_t>
224225
{
226+
auto& warpSizes = dev.m_impl->deviceProperties().warpSizes
225227
{
226228
std::lock_guard<std::shared_mutex> lock(dev.m_impl->mutex());
227-
if(!dev.m_impl->deviceProperties().warpSizes.has_value())
229+
if(!warpSizes.has_value())
228230
{
229231
auto const device = dev.getNativeHandle().first;
230232
std::vector<std::size_t> warp_sizes
@@ -236,10 +238,10 @@ namespace alpaka
236238
warp_sizes.erase(find64);
237239
// Sort the warp sizes in decreasing order
238240
std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
239-
dev.m_impl->deviceProperties().warpSizes = std::move(warp_sizes);
241+
warpSizes = std::move(warp_sizes);
240242
}
241243
}
242-
return dev.m_impl->deviceProperties().warpSizes.value();
244+
return warpSizes.value();
243245
}
244246
};
245247

@@ -249,11 +251,12 @@ namespace alpaka
249251
{
250252
static auto getPreferredWarpSize(DevGenericSycl<TTag> const& dev) -> std::size_t
251253
{
254+
auto& warpSizes = dev.m_impl->deviceProperties().warpSizes;
252255
{
253256
std::lock_guard<std::shared_mutex> lock(dev.m_impl->mutex());
254-
if(dev.m_impl->deviceProperties().warpSizes.has_value())
257+
if(!warpSizes.has_value())
255258
{
256-
return dev.m_impl->deviceProperties().warpSizes.value().front();
259+
return warpSizes.value().front();
257260
}
258261
}
259262
return GetWarpSizes<DevGenericSycl<TTag>>::getWarpSizes(dev).front();

include/alpaka/dev/DevUniformCudaHipRt.hpp

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -122,19 +122,20 @@ namespace alpaka
122122
{
123123
ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt<TApi> const& dev) -> std::string
124124
{
125+
auto& name = dev.m_QueueRegistry->deviceProperties().name;
125126
{
126127
std::lock_guard<std::mutex> lock(dev.m_QueueRegistry->mutex());
127-
if(!dev.m_QueueRegistry->deviceProperties().name.has_value())
128+
if(!name.has_value())
128129
{
129130
// There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties
130131
// to get a single device property but it has no option to get the name
131132
typename TApi::DeviceProp_t devProp;
132133
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle()));
133-
dev.m_QueueRegistry->deviceProperties().name = std::string(devProp.name);
134+
name = std::string(devProp.name);
134135
}
135136
}
136137

137-
return dev.m_QueueRegistry->deviceProperties().name.value();
138+
return name.value();
138139
}
139140
};
140141

@@ -144,9 +145,10 @@ namespace alpaka
144145
{
145146
ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
146147
{
148+
auto& totalGlobalMem = dev.m_QueueRegistry->deviceProperties().totalGlobalMem;
147149
{
148150
std::lock_guard<std::mutex> lock(dev.m_QueueRegistry->mutex());
149-
if(!dev.m_QueueRegistry->deviceProperties().totalGlobalMem.has_value())
151+
if(!totalGlobalMem.has_value())
150152
{
151153
// Set the current device to wait for.
152154
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
@@ -156,11 +158,11 @@ namespace alpaka
156158

157159
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
158160

159-
dev.m_QueueRegistry->deviceProperties().totalGlobalMem = totalInternal;
161+
totalGlobalMem = totalInternal;
160162
}
161163
}
162164

163-
return dev.m_QueueRegistry->deviceProperties().totalGlobalMem.value();
165+
return totalGlobalMem.value();
164166
}
165167
};
166168

@@ -170,10 +172,11 @@ namespace alpaka
170172
{
171173
ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
172174
{
175+
auto& freeInternal = dev.m_QueueRegistry->deviceProperties().freeInternal;
173176
std::size_t freeInternal(0u);
174177
{
175178
std::lock_guard<std::mutex> lock(dev.m_QueueRegistry->mutex());
176-
if(!dev.m_QueueRegistry->deviceProperties().totalGlobalMem.has_value())
179+
if(!totalGlobalMem.has_value())
177180
{
178181
// Set the current device to wait for.
179182
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
@@ -182,7 +185,7 @@ namespace alpaka
182185

183186
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
184187

185-
dev.m_QueueRegistry->deviceProperties().totalGlobalMem = totalInternal;
188+
totalGlobalMem = totalInternal;
186189
}
187190
}
188191

@@ -196,15 +199,28 @@ namespace alpaka
196199
{
197200
ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>
198201
{
202+
auto& warpSizes = dev.m_QueueRegistry->deviceProperties().warpSizes;
199203
{
200204
std::lock_guard<std::mutex> lock(dev.m_QueueRegistry->mutex());
201-
if(!dev.m_QueueRegistry->deviceProperties().warpSizes.has_value())
205+
if(!warpSizes.has_value())
202206
{
203-
dev.m_QueueRegistry->deviceProperties().warpSizes = std::vector<std::size_t>{
204-
GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>::getPreferredWarpSize(dev)};
207+
if(dev.m_QueueRegistry->deviceProperties().preferredWarpSize.has_value())
208+
{
209+
warpSizes = std::vector<std::size_t>{
210+
dev.m_QueueRegistry->deviceProperties().preferredWarpSize.value()};
211+
}
212+
else
213+
{
214+
int warpSize = 0;
215+
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceGetAttribute(
216+
&warpSize,
217+
TApi::deviceAttributeWarpSize,
218+
dev.getNativeHandle()));
219+
warpSizes = std::vector<std::size_t>{warpSize};
220+
}
205221
}
206222
}
207-
return dev.m_QueueRegistry->deviceProperties().warpSizes.value();
223+
return warpSizes.value();
208224
}
209225
};
210226

@@ -214,19 +230,20 @@ namespace alpaka
214230
{
215231
ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
216232
{
233+
auto& preferredWarpSize = dev.m_QueueRegistry->deviceProperties().preferredWarpSize;
217234
{
218235
std::lock_guard<std::mutex> lock(dev.m_QueueRegistry->mutex());
219-
if(!dev.m_QueueRegistry->deviceProperties().preferredWarpSize.has_value())
236+
if(!preferredWarpSize.has_value())
220237
{
221238
int warpSize = 0;
222239

223240
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
224241
TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, dev.getNativeHandle()));
225-
dev.m_QueueRegistry->deviceProperties().preferredWarpSize = static_cast<std::size_t>(warpSize);
242+
preferredWarpSize = static_cast<std::size_t>(warpSize);
226243
}
227244
}
228245

229-
return dev.m_QueueRegistry->deviceProperties().preferredWarpSize.value();
246+
return preferredWarpSize.value();
230247
}
231248
};
232249

0 commit comments

Comments
 (0)