|
1 | | -find_package(CUDA REQUIRED) |
2 | | - |
3 | 1 | if(NOT DEFINED CUDA_ARCHS) |
4 | 2 | ############################### Autodetect CUDA Arch ##################################################### |
5 | 3 | #Auto-detect cuda arch. Inspired by https://wagonhelm.github.io/articles/2018-03/detecting-cuda-capability-with-cmake |
6 | | - # This will define and populates CUDA_ARCHS and put it in the cache |
7 | | - set(cuda_arch_autodetect_file ${CMAKE_BINARY_DIR}/autodetect_cuda_archs.cu) |
8 | | - file(WRITE ${cuda_arch_autodetect_file} [[ |
9 | | - #include <stdio.h> |
10 | | - int main() { |
11 | | - int count = 0; |
12 | | - if (cudaSuccess != cudaGetDeviceCount(&count)) { return -1; } |
13 | | - if (count == 0) { return -1; } |
14 | | - for (int device = 0; device < count; ++device) { |
15 | | - cudaDeviceProp prop; |
16 | | - bool is_unique = true; |
17 | | - if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) { |
18 | | - for (int device_1 = device - 1; device_1 >= 0; --device_1) { |
19 | | - cudaDeviceProp prop_1; |
20 | | - if (cudaSuccess == cudaGetDeviceProperties(&prop_1, device_1)) { |
21 | | - if (prop.major == prop_1.major && prop.minor == prop_1.minor) { |
22 | | - is_unique = false; |
23 | | - break; |
24 | | - } |
| 4 | + # This will define and populates CUDA_ARCHS and put it in the cache |
| 5 | + #Windows users (specially on VS2017 and VS2015) might need to run this |
| 6 | + #>> "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 |
| 7 | + # and change Enterprise to the right edition. More about this here https://stackoverflow.com/a/47746461/1608232 |
| 8 | + if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA) |
| 9 | + set(cuda_arch_autodetect_file ${CMAKE_BINARY_DIR}/autodetect_cuda_archs.cu) |
| 10 | + |
| 11 | + file(WRITE ${cuda_arch_autodetect_file} [[ |
| 12 | +#include <stdio.h> |
| 13 | +int main() { |
| 14 | + int count = 0; |
| 15 | + if (cudaSuccess != cudaGetDeviceCount(&count)) { return -1; } |
| 16 | + if (count == 0) { return -1; } |
| 17 | + for (int device = 0; device < count; ++device) { |
| 18 | + cudaDeviceProp prop; |
| 19 | + bool is_unique = true; |
| 20 | + if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) { |
| 21 | + for (int device_1 = device - 1; device_1 >= 0; --device_1) { |
| 22 | + cudaDeviceProp prop_1; |
| 23 | + if (cudaSuccess == cudaGetDeviceProperties(&prop_1, device_1)) { |
| 24 | + if (prop.major == prop_1.major && prop.minor == prop_1.minor) { |
| 25 | + is_unique = false; |
| 26 | + break; |
25 | 27 | } |
26 | | - else { return -1; } |
27 | | - } |
28 | | - if (is_unique) { |
29 | | - fprintf(stderr, "%d%d", prop.major, prop.minor); |
30 | 28 | } |
| 29 | + else { return -1; } |
| 30 | + } |
| 31 | + if (is_unique) { |
| 32 | + fprintf(stderr, "%d%d", prop.major, prop.minor); |
31 | 33 | } |
32 | | - else { return -1; } |
33 | | - } |
34 | | - return 0; |
35 | 34 | } |
| 35 | + else { return -1; } |
| 36 | + } |
| 37 | + return 0; |
| 38 | +} |
36 | 39 | ]]) |
37 | | - |
38 | | - set(cuda_detect_cmd "${CUDA_NVCC_EXECUTABLE} -ccbin ${CMAKE_CXX_COMPILER} --run ${cuda_arch_autodetect_file}") |
39 | | - message(STATUS "Executing: ${cuda_detect_cmd}") |
40 | | - execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${cuda_arch_autodetect_file}" |
41 | | - #WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/CMakeFiles/" |
42 | | - RESULT_VARIABLE CUDA_RETURN_CODE |
43 | | - OUTPUT_VARIABLE dummy |
44 | | - ERROR_VARIABLE fprintf_output |
45 | | - OUTPUT_STRIP_TRAILING_WHITESPACE) |
46 | | - if(CUDA_RETURN_CODE EQUAL 0) |
47 | | - set(CMAKE_CUDA_ARCHITECTURES ${fprintf_output}) |
48 | | - else() |
49 | | - message(STATUS "GPU architectures auto-detect failed. Will build for all possible architectures.") |
50 | | - set(CMAKE_CUDA_ARCHITECTURES all) |
51 | | - endif() |
| 40 | + |
| 41 | + execute_process(COMMAND "${CMAKE_CUDA_COMPILER}" "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${cuda_arch_autodetect_file}" |
| 42 | + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" |
| 43 | + RESULT_VARIABLE CUDA_RETURN_CODE |
| 44 | + OUTPUT_VARIABLE dummy |
| 45 | + ERROR_VARIABLE fprintf_output |
| 46 | + OUTPUT_STRIP_TRAILING_WHITESPACE) |
| 47 | + |
| 48 | + if(CUDA_RETURN_CODE EQUAL 0) |
| 49 | + set(CMAKE_CUDA_ARCHITECTURES ${fprintf_output}) |
| 50 | + else() |
| 51 | + message(STATUS "GPU architectures auto-detect failed. Will build for sm_70.") |
| 52 | + set(CMAKE_CUDA_ARCHITECTURES 70) |
| 53 | + endif() |
| 54 | + endif() |
52 | 55 | message(STATUS "CUDA architectures= " ${CMAKE_CUDA_ARCHITECTURES}) |
53 | 56 | endif() |
54 | 57 | ################################################################################### |
| 58 | + |
0 commit comments