@@ -79,7 +79,7 @@ void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream,
7979 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
8080 sycl::range<1 >(MT)),
8181 [=] (sycl::nd_item<1 > item)
82- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
82+ [[sycl::reqd_work_group_size (MT)]]
8383 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
8484 {
8585 f (Gpu::Handler{&item,shared_data.get_multi_ptr <sycl::access::decorated::yes>().get ()});
@@ -100,7 +100,7 @@ void launch (int nblocks, gpuStream_t stream, L const& f) noexcept
100100 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
101101 sycl::range<1 >(MT)),
102102 [=] (sycl::nd_item<1 > item)
103- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
103+ [[sycl::reqd_work_group_size (MT)]]
104104 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
105105 {
106106 f (item);
@@ -124,7 +124,7 @@ void launch (T const& n, L const& f) noexcept
124124 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
125125 sycl::range<1 >(nthreads_per_block)),
126126 [=] (sycl::nd_item<1 > item)
127- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
127+ [[sycl::reqd_work_group_size (MT)]]
128128 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
129129 {
130130 for (auto const i : Gpu::Range (n,item.get_global_id (0 ),item.get_global_range (0 ))) {
@@ -203,7 +203,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept
203203 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
204204 sycl::range<1 >(nthreads_per_block)),
205205 [=] (sycl::nd_item<1 > item)
206- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
206+ [[sycl::reqd_work_group_size (MT)]]
207207 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
208208 {
209209 for (std::size_t i = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -220,7 +220,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept
220220 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
221221 sycl::range<1 >(nthreads_per_block)),
222222 [=] (sycl::nd_item<1 > item)
223- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
223+ [[sycl::reqd_work_group_size (MT)]]
224224 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
225225 {
226226 for (std::size_t i = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -252,7 +252,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexc
252252 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
253253 sycl::range<1 >(nthreads_per_block)),
254254 [=] (sycl::nd_item<1 > item)
255- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
255+ [[sycl::reqd_work_group_size (MT)]]
256256 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
257257 {
258258 for (std::uint64_t icell = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -270,7 +270,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexc
270270 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
271271 sycl::range<1 >(nthreads_per_block)),
272272 [=] (sycl::nd_item<1 > item)
273- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
273+ [[sycl::reqd_work_group_size (MT)]]
274274 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
275275 {
276276 for (std::uint64_t icell = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -303,7 +303,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const&
303303 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
304304 sycl::range<1 >(nthreads_per_block)),
305305 [=] (sycl::nd_item<1 > item)
306- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
306+ [[sycl::reqd_work_group_size (MT)]]
307307 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
308308 {
309309 for (std::uint64_t icell = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -322,7 +322,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const&
322322 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
323323 sycl::range<1 >(nthreads_per_block)),
324324 [=] (sycl::nd_item<1 > item)
325- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
325+ [[sycl::reqd_work_group_size (MT)]]
326326 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
327327 {
328328 for (std::uint64_t icell = item.get_global_id (0 ), stride = item.get_global_range (0 );
@@ -353,7 +353,7 @@ void ParallelForRNG (T n, L const& f) noexcept
353353 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
354354 sycl::range<1 >(nthreads_per_block)),
355355 [=] (sycl::nd_item<1 > item)
356- [[sycl::reqd_work_group_size (1 , 1 , AMREX_GPU_MAX_THREADS)]]
356+ [[sycl::reqd_work_group_size (AMREX_GPU_MAX_THREADS)]]
357357 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
358358 {
359359 auto const tid = item.get_global_id (0 );
@@ -387,7 +387,7 @@ void ParallelForRNG (Box const& box, L const& f) noexcept
387387 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
388388 sycl::range<1 >(nthreads_per_block)),
389389 [=] (sycl::nd_item<1 > item)
390- [[sycl::reqd_work_group_size (1 , 1 , AMREX_GPU_MAX_THREADS)]]
390+ [[sycl::reqd_work_group_size (AMREX_GPU_MAX_THREADS)]]
391391 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
392392 {
393393 auto const tid = item.get_global_id (0 );
@@ -423,7 +423,7 @@ void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept
423423 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
424424 sycl::range<1 >(nthreads_per_block)),
425425 [=] (sycl::nd_item<1 > item)
426- [[sycl::reqd_work_group_size (1 , 1 , AMREX_GPU_MAX_THREADS)]]
426+ [[sycl::reqd_work_group_size (AMREX_GPU_MAX_THREADS)]]
427427 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
428428 {
429429 auto const tid = item.get_global_id (0 );
@@ -460,7 +460,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b
460460 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
461461 sycl::range<1 >(nthreads_per_block)),
462462 [=] (sycl::nd_item<1 > item)
463- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
463+ [[sycl::reqd_work_group_size (MT)]]
464464 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
465465 {
466466 auto const ncells = std::max (indexer1.numPts (), indexer2.numPts ());
@@ -500,7 +500,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/,
500500 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
501501 sycl::range<1 >(nthreads_per_block)),
502502 [=] (sycl::nd_item<1 > item)
503- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
503+ [[sycl::reqd_work_group_size (MT)]]
504504 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
505505 {
506506 auto const ncells = std::max ({indexer1.numPts (), indexer2.numPts (), indexer3.numPts ()});
@@ -545,7 +545,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/,
545545 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
546546 sycl::range<1 >(nthreads_per_block)),
547547 [=] (sycl::nd_item<1 > item)
548- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
548+ [[sycl::reqd_work_group_size (MT)]]
549549 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
550550 {
551551 auto const ncells = std::max (indexer1.numPts (), indexer2.numPts ());
@@ -593,7 +593,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/,
593593 h.parallel_for (sycl::nd_range<1 >(sycl::range<1 >(nthreads_total),
594594 sycl::range<1 >(nthreads_per_block)),
595595 [=] (sycl::nd_item<1 > item)
596- [[sycl::reqd_work_group_size (1 , 1 , MT)]]
596+ [[sycl::reqd_work_group_size (MT)]]
597597 [[sycl::reqd_sub_group_size (Gpu::Device::warp_size)]]
598598 {
599599 auto const ncells = std::max ({indexer1.numPts (), indexer2.numPts (), indexer3.numPts ()});
0 commit comments