Skip to content

Commit 3886cfe

Browse files
authored
Merge pull request #232 from sjsprecious/enable_kokkos_cuda_target
Enable the CUDA target of Kokkos library
2 parents 2df4bbd + 9b59c40 commit 3886cfe

File tree

4 files changed

+36
-52
lines changed

4 files changed

+36
-52
lines changed

machines/casper/gnu_casper.cmake

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,20 @@ if (USE_KOKKOS)
1414
# Generic setting that are used regardless of Architecture or Kokkos backend
1515
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_DEPRECATED_CODE=OFF -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=OFF")
1616
if (KOKKOS_GPU_OFFLOAD)
17-
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK")
18-
string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch=sm_80 --use_fast_math")
19-
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=Off")
20-
# Enable A100 arch in kokkos
21-
option(Kokkos_ARCH_AMPERE80 "" ON)
22-
# This var is needed by relatively recent CMake when CUDA language is enabled
23-
# If not defined, CMake issues a warning
24-
set(CMAKE_CUDA_ARCHITECTURES 80 CACHE STRING "")
25-
# Settings used when Cuda is the Kokkos backend
26-
set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION FALSE CACHE BOOL "")
27-
set(Kokkos_ENABLE_CUDA TRUE CACHE BOOL "")
28-
set(Kokkos_ENABLE_CUDA_LAMBDA TRUE CACHE BOOL "")
29-
set(CMAKE_CXX_FLAGS "-DTHRUST_IGNORE_CUB_VERSION_CHECK" CACHE STRING "" FORCE)
17+
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK -DHOMMEXX_ENABLE_GPU")
18+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF -DKokkos_ENABLE_AGGRESSIVE_VECTORIZATION=OFF")
19+
if (GPU_TYPE STREQUAL v100)
20+
string(APPEND KOKKOS_OPTIONS " -DKOKKOS_ARCH_VOLTA70=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
21+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_70")
22+
elseif(GPU_TYPE STREQUAL a100)
23+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
24+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_80")
25+
elseif(GPU_TYPE STREQUAL h100)
26+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_HOPPER90=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
27+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_90")
28+
else()
29+
message(FATAL_ERROR "GPU_TYPE ${GPU_TYPE} not supported")
30+
endif()
3031
else()
3132
# Enable EPYC arch in kokkos
3233
if (compile_threaded)

machines/casper/nvhpc_casper.cmake

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,20 @@ if (USE_KOKKOS)
3030
# Generic setting that are used regardless of Architecture or Kokkos backend
3131
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_DEPRECATED_CODE=OFF -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=OFF")
3232
if (KOKKOS_GPU_OFFLOAD)
33-
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK")
34-
string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch=sm_80 --use_fast_math")
35-
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=Off")
36-
# Enable A100 arch in kokkos
37-
option(Kokkos_ARCH_AMPERE80 "" ON)
38-
# This var is needed by relatively recent CMake when CUDA language is enabled
39-
# If not defined, CMake issues a warning
40-
set(CMAKE_CUDA_ARCHITECTURES 80 CACHE STRING "")
41-
# Settings used when Cuda is the Kokkos backend
42-
set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION FALSE CACHE BOOL "")
43-
set(Kokkos_ENABLE_CUDA TRUE CACHE BOOL "")
44-
set(Kokkos_ENABLE_CUDA_LAMBDA TRUE CACHE BOOL "")
45-
set(CMAKE_CXX_FLAGS "-DTHRUST_IGNORE_CUB_VERSION_CHECK" CACHE STRING "" FORCE)
33+
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK -DHOMMEXX_ENABLE_GPU")
34+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF -DKokkos_ENABLE_AGGRESSIVE_VECTORIZATION=OFF")
35+
if (GPU_TYPE STREQUAL v100)
36+
string(APPEND KOKKOS_OPTIONS " -DKOKKOS_ARCH_VOLTA70=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
37+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_70")
38+
elseif(GPU_TYPE STREQUAL a100)
39+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
40+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_80")
41+
elseif(GPU_TYPE STREQUAL h100)
42+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_HOPPER90=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
43+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_90")
44+
else()
45+
message(FATAL_ERROR "GPU_TYPE ${GPU_TYPE} not supported")
46+
endif()
4647
else()
4748
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_ZEN4=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF") # work-around for nvidia as kokkos is not passing "-mp" for threaded build
4849
endif()

machines/derecho/gnu_derecho.cmake

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,10 @@ if (USE_KOKKOS)
1515
# Generic setting that are used regardless of Architecture or Kokkos backend
1616
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_DEPRECATED_CODE=OFF -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=OFF")
1717
if (KOKKOS_GPU_OFFLOAD)
18-
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK")
19-
string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch=sm_80 --use_fast_math")
20-
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=Off")
21-
# Enable A100 arch in kokkos
22-
option(Kokkos_ARCH_AMPERE80 "" ON)
23-
# This var is needed by relatively recent CMake when CUDA language is enabled
24-
# If not defined, CMake issues a warning
25-
set(CMAKE_CUDA_ARCHITECTURES 80 CACHE STRING "")
26-
# Settings used when Cuda is the Kokkos backend
27-
set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION FALSE CACHE BOOL "")
28-
set(Kokkos_ENABLE_CUDA TRUE CACHE BOOL "")
29-
set(Kokkos_ENABLE_CUDA_LAMBDA TRUE CACHE BOOL "")
30-
set(CMAKE_CXX_FLAGS "-DTHRUST_IGNORE_CUB_VERSION_CHECK" CACHE STRING "" FORCE)
18+
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK -DHOMMEXX_ENABLE_GPU")
19+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
20+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF -DKokkos_ENABLE_AGGRESSIVE_VECTORIZATION=OFF")
21+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_80")
3122
else()
3223
# Enable EPYC arch in kokkos
3324
if (compile_threaded)

machines/derecho/nvhpc_derecho.cmake

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,10 @@ if (USE_KOKKOS)
1616
# Generic setting that are used regardless of Architecture or Kokkos backend
1717
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_DEPRECATED_CODE=OFF -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=OFF")
1818
if (KOKKOS_GPU_OFFLOAD)
19-
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK")
20-
string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch=sm_80 --use_fast_math")
21-
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=Off")
22-
# Enable A100 arch in kokkos
23-
option(Kokkos_ARCH_AMPERE80 "" ON)
24-
# This var is needed by relatively recent CMake when CUDA language is enabled
25-
# If not defined, CMake issues a warning
26-
set(CMAKE_CUDA_ARCHITECTURES 80 CACHE STRING "")
27-
# Settings used when Cuda is the Kokkos backend
28-
set(Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION FALSE CACHE BOOL "")
29-
set(Kokkos_ENABLE_CUDA TRUE CACHE BOOL "")
30-
set(Kokkos_ENABLE_CUDA_LAMBDA TRUE CACHE BOOL "")
31-
set(CMAKE_CXX_FLAGS "-DTHRUST_IGNORE_CUB_VERSION_CHECK" CACHE STRING "" FORCE)
19+
string(APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK -DHOMMEXX_ENABLE_GPU")
20+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF")
21+
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF -DKokkos_ENABLE_AGGRESSIVE_VECTORIZATION=OFF")
22+
string(APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_80")
3223
else()
3324
# Enable EPYC arch in kokkos
3425
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_ZEN3=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF") # work-around for nvidia as kokkos is not passing "-mp" for threaded build

0 commit comments

Comments
 (0)