Skip to content

Commit aebbe43

Browse files
committed
Merge branch 'development' of https://github.com/AMReX-Codes/amrex into bicgstab-print-development
2 parents 6b9afc9 + a9da2a5 commit aebbe43

28 files changed

+733
-639
lines changed

.github/workflows/intel.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,9 @@ jobs:
226226
-DCMAKE_VERBOSE_MAKEFILE=ON \
227227
-DAMReX_EB=ON \
228228
-DAMReX_ENABLE_TESTS=ON \
229-
-DAMReX_FORTRAN=ON \
229+
-DAMReX_FORTRAN=OFF \
230230
-DCMAKE_C_COMPILER=$(which icc) \
231231
-DCMAKE_CXX_COMPILER=$(which icpc) \
232-
-DCMAKE_Fortran_COMPILER=$(which ifort) \
233232
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
234233
cmake --build build --parallel 2
235234
cmake --build build --target install

Docs/sphinx_documentation/source/GPU.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -489,11 +489,10 @@ GPU support.
489489
When AMReX is compiled with ``USE_OMP_OFFLOAD=TRUE``,
490490
``AMREX_USE_OMP_OFFLOAD`` is defined.
491491

492-
In addition to AMReX's preprocessor macros, CUDA provides the
493-
``__CUDA_ARCH__`` macro which is only defined when in device code.
494-
HIP and Sycl provide similar macros.
495-
``AMREX_DEVICE_COMPILE`` should be used when a ``__host__ __device__``
496-
function requires separate code for the CPU and GPU implementations.
492+
The macros ``AMREX_IF_ON_DEVICE((code_for_device))`` and
493+
``AMREX_IF_ON_HOST((code_for_host))`` should be used when a
494+
``__host__ __device__`` function requires separate code for the
495+
CPU and GPU implementations.
497496

498497
.. ===================================================================
499498

Src/Base/AMReX.H

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,15 @@ namespace amrex
113113

114114
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
115115
void Error (const char* msg = nullptr) {
116-
#if AMREX_DEVICE_COMPILE
117116
#if defined(NDEBUG)
118-
amrex::ignore_unused(msg);
117+
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
119118
#else
120-
if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); }
121-
AMREX_DEVICE_ASSERT(0);
122-
#endif
123-
#else
124-
Error_host("Error", msg);
119+
AMREX_IF_ON_DEVICE((
120+
if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); }
121+
AMREX_DEVICE_ASSERT(0);
122+
))
125123
#endif
124+
AMREX_IF_ON_HOST((Error_host("Error", msg);))
126125
}
127126

128127
//! Print out warning message to cerr.
@@ -132,32 +131,28 @@ namespace amrex
132131

133132
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
134133
void Warning (const char * msg) {
135-
#if AMREX_DEVICE_COMPILE
136134
#if defined(NDEBUG)
137-
amrex::ignore_unused(msg);
138-
#else
139-
if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); }
140-
#endif
135+
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
141136
#else
142-
Warning_host(msg);
137+
AMREX_IF_ON_DEVICE((if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); }))
143138
#endif
139+
AMREX_IF_ON_HOST((Warning_host(msg);))
144140
}
145141

146142
//! Print out message to cerr and exit via abort().
147143
void Abort (const std::string& msg);
148144

149145
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
150146
void Abort (const char * msg = nullptr) {
151-
#if AMREX_DEVICE_COMPILE
152147
#if defined(NDEBUG)
153-
amrex::ignore_unused(msg);
148+
AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);))
154149
#else
155-
if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); }
156-
AMREX_DEVICE_ASSERT(0);
157-
#endif
158-
#else
159-
Error_host("Abort", msg);
150+
AMREX_IF_ON_DEVICE((
151+
if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); }
152+
AMREX_DEVICE_ASSERT(0);
153+
))
160154
#endif
155+
AMREX_IF_ON_HOST((Error_host("Abort", msg);))
161156
}
162157

163158
/**
@@ -170,22 +165,21 @@ namespace amrex
170165

171166
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
172167
void Assert (const char* EX, const char* file, int line, const char* msg = nullptr) {
173-
#if AMREX_DEVICE_COMPILE
174168
#if defined(NDEBUG)
175-
amrex::ignore_unused(EX,file,line,msg);
176-
#else
177-
if (msg) {
178-
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s",
179-
EX, file, line, msg);
180-
} else {
181-
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d",
182-
EX, file, line);
183-
}
184-
AMREX_DEVICE_ASSERT(0);
185-
#endif
169+
AMREX_IF_ON_DEVICE((amrex::ignore_unused(EX,file,line,msg);))
186170
#else
187-
Assert_host(EX,file,line,msg);
171+
AMREX_IF_ON_DEVICE((
172+
if (msg) {
173+
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s",
174+
EX, file, line, msg);
175+
} else {
176+
AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d",
177+
EX, file, line);
178+
}
179+
AMREX_DEVICE_ASSERT(0);
180+
))
188181
#endif
182+
AMREX_IF_ON_HOST((Assert_host(EX,file,line,msg);))
189183
}
190184

191185
/**

Src/Base/AMReX_Algorithm.H

Lines changed: 98 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -161,51 +161,52 @@ namespace amrex
161161
AMREX_GPU_HOST_DEVICE
162162
ItType upper_bound (ItType first, ItType last, const ValType& val)
163163
{
164-
#if AMREX_DEVICE_COMPILE
165-
std::ptrdiff_t count = last-first;
166-
while(count>0){
167-
auto it = first;
168-
const auto step = count/2;
169-
it += step;
170-
if (!(val < *it)){
171-
first = ++it;
172-
count -= step + 1;
164+
AMREX_IF_ON_DEVICE((
165+
std::ptrdiff_t count = last-first;
166+
while(count>0){
167+
auto it = first;
168+
const auto step = count/2;
169+
it += step;
170+
if (!(val < *it)){
171+
first = ++it;
172+
count -= step + 1;
173+
}
174+
else{
175+
count = step;
176+
}
173177
}
174-
else{
175-
count = step;
176-
}
177-
}
178-
179-
return first;
180-
#else
181-
return std::upper_bound(first, last, val);
182-
#endif
178+
return first;
179+
))
180+
AMREX_IF_ON_HOST((
181+
return std::upper_bound(first, last, val);
182+
))
183183
}
184184

185185
template<typename ItType, typename ValType>
186186
AMREX_GPU_HOST_DEVICE
187187
ItType lower_bound (ItType first, ItType last, const ValType& val)
188188
{
189-
#ifdef AMREX_DEVICE_COMPILE
190-
std::ptrdiff_t count = last-first;
191-
while(count>0)
192-
{
193-
auto it = first;
194-
const auto step = count/2;
195-
it += step;
196-
if (*it < val){
197-
first = ++it;
198-
count -= step + 1;
199-
}
200-
else{
201-
count = step;
189+
AMREX_IF_ON_DEVICE((
190+
std::ptrdiff_t count = last-first;
191+
while(count>0)
192+
{
193+
auto it = first;
194+
const auto step = count/2;
195+
it += step;
196+
if (*it < val){
197+
first = ++it;
198+
count -= step + 1;
199+
}
200+
else{
201+
count = step;
202+
}
202203
}
203-
}
204204

205-
return first;
206-
#else
207-
return std::lower_bound(first, last, val);
208-
#endif
205+
return first;
206+
))
207+
AMREX_IF_ON_HOST((
208+
return std::lower_bound(first, last, val);
209+
))
209210
}
210211

211212
namespace detail {
@@ -239,83 +240,100 @@ int builtin_clz_wrapper (clzll_tag, T x) noexcept
239240
return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
240241
}
241242

242-
#ifdef AMREX_USE_CUDA
243-
244-
// likewise with CUDA, there are __clz functions that take (signed) int and long long int
245-
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(int)>::type>
246-
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
247-
int clz_wrapper (clz_tag, T x) noexcept
248-
{
249-
return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
250-
}
251-
252-
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(long long int)>::type>
253-
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
254-
int clz_wrapper (clzll_tag, T x) noexcept
255-
{
256-
return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
257243
}
258-
#endif
259244

260-
}
245+
template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
246+
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
247+
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
248+
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>
249+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
250+
int clz (T x) noexcept;
261251

262252
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
263-
int clz (std::uint8_t x) noexcept
253+
int clz_generic (std::uint8_t x) noexcept
264254
{
265-
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
266-
return detail::clz_wrapper(detail::clz_tag{}, x);
267-
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
268-
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
269-
#else
270255
static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
271256
auto upper = x >> 4;
272257
auto lower = x & 0xF;
273258
return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
274-
#endif
275259
}
276260

277261
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
278-
int clz (std::uint16_t x) noexcept
262+
int clz_generic (std::uint16_t x) noexcept
279263
{
280-
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
281-
return detail::clz_wrapper(detail::clz_tag{}, x);
282-
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
283-
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
284-
#else
285264
auto upper = std::uint8_t(x >> 8);
286265
auto lower = std::uint8_t(x & 0xFF);
287266
return upper ? clz(upper) : 8 + clz(lower);
288-
#endif
289267
}
290268

291269
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
292-
int clz (std::uint32_t x) noexcept
270+
int clz_generic (std::uint32_t x) noexcept
293271
{
294-
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
295-
return detail::clz_wrapper(detail::clz_tag{}, x);
296-
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
297-
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
298-
#else
299272
auto upper = std::uint16_t(x >> 16);
300273
auto lower = std::uint16_t(x & 0xFFFF);
301274
return upper ? clz(upper) : 16 + clz(lower);
302-
#endif
303275
}
304276

305277
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
306-
int clz (std::uint64_t x) noexcept
278+
int clz_generic (std::uint64_t x) noexcept
307279
{
308-
#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz
309-
return detail::clz_wrapper(detail::clz_tag{}, x);
310-
#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
311-
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
312-
#else
313280
auto upper = std::uint32_t(x >> 32);
314281
auto lower = std::uint32_t(x & 0xFFFFFFFF);
315282
return upper ? clz(upper) : 32 + clz(lower);
283+
}
284+
285+
#if defined AMREX_USE_CUDA
286+
287+
namespace detail {
288+
// likewise with CUDA, there are __clz functions that take (signed) int and long long int
289+
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(int)>::type>
290+
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
291+
int clz_wrapper (clz_tag, T x) noexcept
292+
{
293+
return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
294+
}
295+
296+
template <typename T, typename = typename std::enable_if<sizeof(T) <= sizeof(long long int)>::type>
297+
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
298+
int clz_wrapper (clzll_tag, T x) noexcept
299+
{
300+
return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
301+
}
302+
}
303+
304+
template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
305+
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
306+
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
307+
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
308+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
309+
int clz (T x) noexcept
310+
{
311+
AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
312+
#if AMREX_HAS_BUILTIN_CLZ
313+
AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
314+
#else
315+
AMREX_IF_ON_HOST((return clz_generic(x);))
316316
#endif
317317
}
318318

319+
#else // !defined AMREX_USE_CUDA
320+
321+
template <class T, typename std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
322+
std::is_same_v<std::decay_t<T>,std::uint16_t> ||
323+
std::is_same_v<std::decay_t<T>,std::uint32_t> ||
324+
std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
325+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
326+
int clz (T x) noexcept
327+
{
328+
#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
329+
return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
330+
#else
331+
return clz_generic(x);
332+
#endif
333+
}
334+
335+
#endif // defined AMREX_USE_CUDA
336+
319337
}
320338

321339
#endif

0 commit comments

Comments
 (0)