Skip to content

Commit 3097872

Browse files
authored
Merge pull request #684 from ComputationalRadiationPhysics/release-0.3.4
0.3.4: Support for CUDA 10 and bug fixes
2 parents 172b9e2 + af5f004 commit 3097872

File tree

123 files changed

+2420
-809
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+2420
-809
lines changed

alpakaConfig.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,12 @@ IF(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OR ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OR A
279279
SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE OFF CACHE BOOL "Enable the OpenMP 4.0 CPU block and thread back-end" FORCE)
280280

281281
ELSE()
282+
283+
# Check whether OpenMP 4 is supported
284+
IF(OpenMP_CXX_VERSION VERSION_LESS 4.0)
285+
SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE OFF CACHE BOOL "Enable the OpenMP 4.0 CPU block and thread back-end" FORCE)
286+
ENDIF()
287+
282288
# CUDA requires some special handling
283289
IF(ALPAKA_ACC_GPU_CUDA_ENABLE)
284290
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")

appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ environment:
4949
matrix:
5050
- ALPAKA_DEBUG: 2
5151
OMP_NUM_THREADS: 4
52-
ALPAKA_BOOST_BRANCH: boost-1.65.1
52+
ALPAKA_BOOST_BRANCH: boost-1.66.0
5353
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
5454
- OMP_NUM_THREADS: 4
5555
ALPAKA_BOOST_BRANCH: boost-1.64.0
@@ -141,7 +141,7 @@ before_build:
141141
- cmd: if not "%ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE%"=="OFF" set ALPAKA_BOOST_B2=%ALPAKA_BOOST_B2% --with-fiber --with-context --with-thread --with-system --with-atomic --with-chrono --with-date_time
142142

143143
- cmd: if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" set ALPAKA_BOOST_TOOLSET=msvc-14.1
144-
- cmd: b2 -j2 --toolset=%ALPAKA_BOOST_TOOLSET% --layout=versioned %ALPAKA_BOOST_B2% architecture=x86 address-model=%ALPAKA_BOOST_ADDRESS_MODEL% variant=%ALPAKA_BOOST_VARIANT% link=static threading=multi runtime-link=shared define=_CRT_NONSTDC_NO_DEPRECATE define=_CRT_SECURE_NO_DEPRECATE define=_SCL_SECURE_NO_DEPRECAT define=BOOST_USE_WINFIBERS --stagedir="%ALPAKA_B2_STAGE_DIR%"
144+
- cmd: b2 -j2 --toolset=%ALPAKA_BOOST_TOOLSET% --layout=versioned %ALPAKA_BOOST_B2% architecture=x86 address-model=%ALPAKA_BOOST_ADDRESS_MODEL% variant=%ALPAKA_BOOST_VARIANT% link=static threading=multi runtime-link=shared define=_CRT_NONSTDC_NO_DEPRECATE define=_CRT_SECURE_NO_DEPRECATE define=_SCL_SECURE_NO_DEPRECAT define=BOOST_USE_WINFIBERS define=_ENABLE_EXTENDED_ALIGNED_STORAGE --stagedir="%ALPAKA_B2_STAGE_DIR%"
145145

146146
#-------------------------------------------------------------------------------
147147
# Install TBB

include/alpaka/acc/AccCpuFibers.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@
4545
#include <alpaka/size/Traits.hpp>
4646

4747
// Implementation details.
48+
#include <alpaka/core/BoostPredef.hpp>
49+
#include <alpaka/core/ClipCast.hpp>
50+
#include <alpaka/core/Fibers.hpp>
51+
#include <alpaka/core/Unused.hpp>
4852
#include <alpaka/dev/DevCpu.hpp>
4953

5054
#include <alpaka/core/Fibers.hpp>
5155

5256
#include <boost/core/ignore_unused.hpp>
53-
#include <boost/predef.h>
54-
5557
#include <memory>
5658
#include <typeinfo>
5759

@@ -188,7 +190,7 @@ namespace alpaka
188190
#endif
189191
return {
190192
// m_multiProcessorCount
191-
std::max(static_cast<TSize>(1), static_cast<TSize>(std::thread::hardware_concurrency())), // \TODO: This may be inaccurate.
193+
std::max(static_cast<TSize>(1), alpaka::core::clipCast<TSize>(std::thread::hardware_concurrency())), // \TODO: This may be inaccurate.
192194
// m_gridBlockExtentMax
193195
vec::Vec<TDim, TSize>::all(std::numeric_limits<TSize>::max()),
194196
// m_gridBlockCountMax

include/alpaka/acc/AccCpuOmp2Threads.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#include <alpaka/size/Traits.hpp>
5050

5151
// Implementation details.
52+
#include <alpaka/core/ClipCast.hpp>
53+
#include <alpaka/core/Unused.hpp>
5254
#include <alpaka/dev/DevCpu.hpp>
5355

5456
#include <boost/core/ignore_unused.hpp>
@@ -178,9 +180,9 @@ namespace alpaka
178180
boost::ignore_unused(dev);
179181

180182
#ifdef ALPAKA_CI
181-
auto const blockThreadCountMax(static_cast<TSize>(std::min(4, ::omp_get_max_threads())));
183+
auto const blockThreadCountMax(alpaka::core::clipCast<TSize>(std::min(4, ::omp_get_max_threads())));
182184
#else
183-
auto const blockThreadCountMax(static_cast<TSize>(::omp_get_max_threads()));
185+
auto const blockThreadCountMax(alpaka::core::clipCast<TSize>(::omp_get_max_threads()));
184186
#endif
185187
return {
186188
// m_multiProcessorCount

include/alpaka/acc/AccCpuOmp4.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#include <alpaka/size/Traits.hpp>
5050

5151
// Implementation details.
52+
#include <alpaka/core/ClipCast.hpp>
53+
#include <alpaka/core/Unused.hpp>
5254
#include <alpaka/dev/DevCpu.hpp>
5355

5456
#include <boost/core/ignore_unused.hpp>
@@ -178,9 +180,9 @@ namespace alpaka
178180
boost::ignore_unused(dev);
179181

180182
#ifdef ALPAKA_CI
181-
auto const blockThreadCountMax(static_cast<TSize>(std::min(4, ::omp_get_max_threads())));
183+
auto const blockThreadCountMax(alpaka::core::clipCast<TSize>(std::min(4, ::omp_get_max_threads())));
182184
#else
183-
auto const blockThreadCountMax(static_cast<TSize>(::omp_get_max_threads()));
185+
auto const blockThreadCountMax(alpaka::core::clipCast<TSize>(::omp_get_max_threads()));
184186
#endif
185187
return {
186188
// m_multiProcessorCount

include/alpaka/acc/AccCpuThreads.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,12 @@
4444
#include <alpaka/size/Traits.hpp>
4545

4646
// Implementation details.
47+
#include <alpaka/core/BoostPredef.hpp>
48+
#include <alpaka/core/ClipCast.hpp>
49+
#include <alpaka/core/Unused.hpp>
4750
#include <alpaka/dev/DevCpu.hpp>
4851

4952
#include <boost/core/ignore_unused.hpp>
50-
#include <boost/predef.h>
51-
5253
#include <memory>
5354
#include <thread>
5455
#include <typeinfo>
@@ -183,7 +184,7 @@ namespace alpaka
183184
#else
184185
// \TODO: Magic number. What is the maximum? Just set a reasonable value? There is a implementation defined maximum where the creation of a new thread crashes.
185186
// std::thread::hardware_concurrency can return 0, so 1 is the default case?
186-
auto const blockThreadCountMax(std::max(static_cast<TSize>(1), static_cast<TSize>(std::thread::hardware_concurrency() * 8)));
187+
auto const blockThreadCountMax(std::max(static_cast<TSize>(1), alpaka::core::clipCast<TSize>(std::thread::hardware_concurrency() * 8)));
187188
#endif
188189
return {
189190
// m_multiProcessorCount

include/alpaka/acc/AccGpuCudaRt.hpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@
5050
#include <alpaka/size/Traits.hpp>
5151

5252
// Implementation details.
53-
#include <alpaka/dev/DevCudaRt.hpp>
53+
#include <alpaka/core/BoostPredef.hpp>
54+
#include <alpaka/core/ClipCast.hpp>
5455
#include <alpaka/core/Cuda.hpp>
55-
56-
#include <boost/predef.h>
56+
#include <alpaka/dev/DevCudaRt.hpp>
5757

5858
#include <typeinfo>
5959

@@ -161,23 +161,23 @@ namespace alpaka
161161

162162
return {
163163
// m_multiProcessorCount
164-
static_cast<TSize>(cudaDevProp.multiProcessorCount),
164+
alpaka::core::clipCast<TSize>(cudaDevProp.multiProcessorCount),
165165
// m_gridBlockExtentMax
166166
extent::getExtentVecEnd<TDim>(
167167
vec::Vec<dim::DimInt<3u>, TSize>(
168-
static_cast<TSize>(cudaDevProp.maxGridSize[2]),
169-
static_cast<TSize>(cudaDevProp.maxGridSize[1]),
170-
static_cast<TSize>(cudaDevProp.maxGridSize[0]))),
168+
alpaka::core::clipCast<TSize>(cudaDevProp.maxGridSize[2u]),
169+
alpaka::core::clipCast<TSize>(cudaDevProp.maxGridSize[1u]),
170+
alpaka::core::clipCast<TSize>(cudaDevProp.maxGridSize[0u]))),
171171
// m_gridBlockCountMax
172172
std::numeric_limits<TSize>::max(),
173173
// m_blockThreadExtentMax
174174
extent::getExtentVecEnd<TDim>(
175175
vec::Vec<dim::DimInt<3u>, TSize>(
176-
static_cast<TSize>(cudaDevProp.maxThreadsDim[2]),
177-
static_cast<TSize>(cudaDevProp.maxThreadsDim[1]),
178-
static_cast<TSize>(cudaDevProp.maxThreadsDim[0]))),
176+
alpaka::core::clipCast<TSize>(cudaDevProp.maxThreadsDim[2u]),
177+
alpaka::core::clipCast<TSize>(cudaDevProp.maxThreadsDim[1u]),
178+
alpaka::core::clipCast<TSize>(cudaDevProp.maxThreadsDim[0u]))),
179179
// m_blockThreadCountMax
180-
static_cast<TSize>(cudaDevProp.maxThreadsPerBlock),
180+
alpaka::core::clipCast<TSize>(cudaDevProp.maxThreadsPerBlock),
181181
// m_threadElemExtentMax
182182
vec::Vec<TDim, TSize>::all(std::numeric_limits<TSize>::max()),
183183
// m_threadElemCountMax

include/alpaka/alpaka.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
#include <alpaka/core/Assert.hpp>
7777
#include <alpaka/core/Align.hpp>
7878
#include <alpaka/core/BarrierThread.hpp>
79+
#include <alpaka/core/BoostPredef.hpp>
80+
#include <alpaka/core/ClipCast.hpp>
7981
#include <alpaka/core/Common.hpp>
8082
#include <alpaka/core/ConcurrentExecPool.hpp>
8183
#include <alpaka/core/Cuda.hpp>
@@ -158,7 +160,7 @@
158160
#include <alpaka/meta/Fold.hpp>
159161
#include <alpaka/meta/ForEachType.hpp>
160162
#include <alpaka/meta/IntegerSequence.hpp>
161-
#include <alpaka/meta/IsIntegralSuperset.hpp>
163+
#include <alpaka/meta/Integral.hpp>
162164
#include <alpaka/meta/IsStrictBase.hpp>
163165
#include <alpaka/meta/Metafunctions.hpp>
164166
#include <alpaka/meta/NdLoop.hpp>

include/alpaka/core/Align.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
#pragma once
2323

24-
#include <boost/predef.h>
24+
#include <alpaka/core/BoostPredef.hpp>
2525

2626
#include <cstddef>
2727
#include <type_traits>

include/alpaka/core/Assert.hpp

Lines changed: 68 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,7 @@
2222
#pragma once
2323

2424
#include <alpaka/core/Common.hpp>
25-
26-
#if !BOOST_ARCH_CUDA_DEVICE
27-
#include <boost/core/ignore_unused.hpp>
28-
#endif
29-
#include <boost/predef.h>
25+
#include <alpaka/core/Unused.hpp>
3026

3127
#include <cassert>
3228
#include <type_traits>
@@ -51,21 +47,11 @@ namespace alpaka
5147
{
5248
ALPAKA_NO_HOST_ACC_WARNING
5349
ALPAKA_FN_HOST_ACC static auto assertValueUnsigned(
54-
#ifdef NDEBUG
55-
#if !BOOST_ARCH_CUDA_DEVICE
5650
TArg const & arg)
57-
#else
58-
TArg const &)
59-
#endif
60-
#else
61-
TArg const & arg)
62-
#endif
6351
-> void
6452
{
6553
#ifdef NDEBUG
66-
#if !BOOST_ARCH_CUDA_DEVICE
67-
boost::ignore_unused(arg);
68-
#endif
54+
alpaka::ignore_unused(arg);
6955
#else
7056
assert(arg >= 0);
7157
#endif
@@ -80,16 +66,10 @@ namespace alpaka
8066
{
8167
ALPAKA_NO_HOST_ACC_WARNING
8268
ALPAKA_FN_HOST_ACC static auto assertValueUnsigned(
83-
#if !BOOST_ARCH_CUDA_DEVICE
8469
TArg const & arg)
85-
#else
86-
TArg const &)
87-
#endif
8870
-> void
8971
{
90-
#if !BOOST_ARCH_CUDA_DEVICE
91-
boost::ignore_unused(arg);
92-
#endif
72+
alpaka::ignore_unused(arg);
9373
// Nothing to do for unsigned types.
9474
}
9575
};
@@ -109,5 +89,70 @@ namespace alpaka
10989
::assertValueUnsigned(
11090
arg);
11191
}
92+
93+
namespace detail
94+
{
95+
//#############################################################################
96+
template<
97+
typename TLhs,
98+
typename TRhs,
99+
typename TSfinae = void>
100+
struct AssertGreaterThan;
101+
//#############################################################################
102+
template<
103+
typename TLhs,
104+
typename TRhs>
105+
struct AssertGreaterThan<
106+
TLhs,
107+
TRhs,
108+
typename std::enable_if<!std::is_unsigned<TRhs>::value || (TLhs::value != 0u)>::type>
109+
{
110+
ALPAKA_NO_HOST_ACC_WARNING
111+
ALPAKA_FN_HOST_ACC static auto assertGreaterThan(
112+
TRhs const & lhs)
113+
-> void
114+
{
115+
#ifdef NDEBUG
116+
alpaka::ignore_unused(lhs);
117+
#else
118+
assert(TLhs::value > lhs);
119+
#endif
120+
}
121+
};
122+
//#############################################################################
123+
template<
124+
typename TLhs,
125+
typename TRhs>
126+
struct AssertGreaterThan<
127+
TLhs,
128+
TRhs,
129+
typename std::enable_if<std::is_unsigned<TRhs>::value && (TLhs::value == 0u)>::type>
130+
{
131+
ALPAKA_NO_HOST_ACC_WARNING
132+
ALPAKA_FN_HOST_ACC static auto assertGreaterThan(
133+
TRhs const & lhs)
134+
-> void
135+
{
136+
alpaka::ignore_unused(lhs);
137+
// Nothing to do for unsigned types camparing to zero.
138+
}
139+
};
140+
}
141+
//-----------------------------------------------------------------------------
142+
//! This method asserts that the integral value TArg is less than Tidx.
143+
ALPAKA_NO_HOST_ACC_WARNING
144+
template<
145+
typename TLhs,
146+
typename TRhs>
147+
ALPAKA_FN_HOST_ACC auto assertGreaterThan(
148+
TRhs const & lhs)
149+
-> void
150+
{
151+
detail::AssertGreaterThan<
152+
TLhs,
153+
TRhs>
154+
::assertGreaterThan(
155+
lhs);
156+
}
112157
}
113158
}

0 commit comments

Comments
 (0)