11if (COMP_NAME STREQUAL gptl)
22 string (APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY" )
33endif ()
4- string (APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g" )
5- string (APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g" )
6- set (MPICC "mpicc" )
7- set (MPICXX "mpicxx" )
8- set (MPIFC "mpif90" )
9- set (SCC "gcc" )
10- set (SCXX "g++" )
11- set (SFC "gfortran" )
4+
5+ if (NOT DEBUG)
6+ if (GPU_TYPE STREQUAL none OR GPU_TYPE STREQUAL "mi300a" ) # Large-memory HTC nodes (AMD EPYC 9554P CPU) or MI300A GPU
7+ string (APPEND CFLAGS " -march=znver4 -mtune=znver4" )
8+ string (APPEND CXXFLAGS " -march=znver4 -mtune=znver4" )
9+ string (APPEND FFLAGS " -march=znver4 -mtune=znver4 -fstack-arrays" )
10+ string (APPEND LDFLAGS " -march=znver4 -mtune=znver4" )
11+ elseif (GPU_TYPE STREQUAL a100) # AMD EPYC Milan 7763 CPU
12+ string (APPEND CFLAGS " -march=znver3 -mtune=znver3" )
13+ string (APPEND CXXFLAGS " -march=znver3 -mtune=znver3" )
14+ string (APPEND FFLAGS " -march=znver3 -mtune=znver3 -fstack-arrays" )
15+ string (APPEND LDFLAGS " -march=znver3 -mtune=znver3" )
16+ elseif (GPU_TYPE STREQUAL h100) # Intel Xeon Gold 6430 CPU
17+ string (APPEND CFLAGS " -march=sapphirerapids -mtune=sapphirerapids" )
18+ string (APPEND CXXFLAGS " -march=sapphirerapids -mtune=sapphirerapids" )
19+ string (APPEND FFLAGS " -march=sapphirerapids -mtune=sapphirerapids -fstack-arrays" )
20+ string (APPEND LDFLAGS " -march=sapphirerapids -mtune=sapphirerapids" )
21+ else () # V100 GPU nodes or small-memory HTC nodes
22+ string (APPEND CFLAGS " -march=cascadelake -mtune=cascadelake" )
23+ string (APPEND CXXFLAGS " -march=cascadelake -mtune=cascadelake" )
24+ string (APPEND FFLAGS " -march=cascadelake -mtune=cascadelake -fstack-arrays" )
25+ string (APPEND LDFLAGS " -march=cascadelake -mtune=cascadelake" )
26+ endif ()
27+ endif ()
28+
29+ if (GPU_TYPE STREQUAL "mi300a" )
30+ string (APPEND SLIBS " -lopenblas" ) # -llapack -lblas
31+ endif ()
32+ if (MPILIB STREQUAL mpi-serial)
33+ string (APPEND SLIBS " -ldl" )
34+ endif ()
35+ string (APPEND SLIBS " -L${NETCDF_PATH} /lib -lnetcdf -lnetcdff" )
36+ message ("GPU_TYPE is ${GPU_TYPE} " )
37+ message ("OPENACC_GPU_OFFLOAD is ${OPENACC_GPU_OFFLOAD} " )
38+ message ("OPENMP_GPU_OFFLOAD is ${OPENMP_GPU_OFFLOAD} " )
1239
1340if (USE_KOKKOS)
1441 # Generic setting that are used regardless of Architecture or Kokkos backend
@@ -17,14 +44,19 @@ if (USE_KOKKOS)
1744 string (APPEND CPPDEFS " -DGPU -DTHRUST_IGNORE_CUB_VERSION_CHECK -DHOMMEXX_ENABLE_GPU" )
1845 string (APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=OFF -DKokkos_ENABLE_AGGRESSIVE_VECTORIZATION=OFF" )
1946 if (GPU_TYPE STREQUAL v100)
20- string (APPEND KOKKOS_OPTIONS " -DKOKKOS_ARCH_VOLTA70 =ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF" )
47+ string (APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_VOLTA70 =ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF" )
2148 string (APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_70" )
2249 elseif (GPU_TYPE STREQUAL a100)
2350 string (APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF" )
2451 string (APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_80" )
2552 elseif (GPU_TYPE STREQUAL h100)
2653 string (APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_HOPPER90=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF" )
2754 string (APPEND CXXFLAGS " -extended-lambda -Wext-lambda-captures-this -std=c++17 -arch=sm_90" )
55+ elseif (GPU_TYPE STREQUAL mi300a)
56+ string (APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_ZEN4=ON -DAMDGPU_TARGETS=GFX942 -DKokkos_ARCH_AMD_GFX942=ON" ) # Currently theta-l_kokkos dycore does not handle APU correctly (i.e., -DKokkos_ARCH_AMD_GFX942_APU=ON)
57+ string (APPEND CXXFLAGS " -D__HIP_PLATFORM_AMD__ -std=c++17 -Wno-mismatched-tags --offload-arch=gfx942 -munsafe-fp-atomics -fno-gpu-rdc -x hip -I$ENV{NCAR_INC_OPENMPI} " )
58+ set (SCXX "hipcc" )
59+ set (MPICXX ${SCXX} )
2860 else ()
2961 message (FATAL_ERROR "GPU_TYPE ${GPU_TYPE} not supported" )
3062 endif ()
@@ -42,4 +74,7 @@ if (USE_KOKKOS)
4274 set (CMAKE_Fortran_FLAGS "-fallow-argument-mismatch" CACHE STRING "" FORCE) # only works with gnu v10 and above
4375 endif ()
4476 string (APPEND LDFLAGS " -lstdc++ -lkokkoscontainers -lkokkoscore -lkokkossimd " )
77+ if (GPU_TYPE STREQUAL "mi300a" )
78+ string (APPEND LDFLAGS " -lhiprtc -lamdhip64 " )
79+ endif ()
4580endif ()
0 commit comments