Skip to content

Commit dd3949b

Browse files
author
Carsten Griwodz
committed
[cmake] add vocal parameters, handle cached CCs
1 parent ec84e9c commit dd3949b

File tree

2 files changed

+119
-47
lines changed

2 files changed

+119
-47
lines changed

CMakeLists.txt

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -88,68 +88,72 @@ endif()
8888

8989
message(STATUS "CUDA Version is ${CUDA_VERSION}")
9090

91+
include(ChooseCudaCC)
9192
if(NOT DEFINED PopSift_CUDA_CC_LIST)
92-
include(ChooseCudaCC)
93-
set(PopSift_MIN_CC 30)
94-
set(PopSift_MIN_CUDA_VERSION 7.0)
95-
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION})
93+
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC
94+
PopSift_CUDA_GENCODE_FLAGS
95+
MIN_CC 30
96+
MIN_CUDA_VERSION 7.0)
9697
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
98+
else()
99+
getFlagsForCudaCCList(PopSift_CUDA_CC_LIST
100+
PopSift_CUDA_GENCODE_FLAGS)
97101
endif()
98-
message(STATUS "Compiling for CUDA CCs ${PopSift_CUDA_CC_LIST}")
102+
list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}")
99103

100104
if(PopSift_USE_NVTX_PROFILING)
101105
message(STATUS "PROFILING CPU CODE: NVTX is in use")
102106
endif()
103107

104108
if(PopSift_ERRCHK_AFTER_KERNEL)
105109
message(STATUS "Synchronizing and checking errors after every kernel call")
106-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
110+
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
107111
endif()
108112

109113
set(CUDA_SEPARABLE_COMPILATION ON)
110114

111115
if(UNIX AND NOT APPLE)
112-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic")
113-
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
114-
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
115-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep")
116-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx")
116+
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic")
117+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
118+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
119+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep")
120+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx")
117121
endif()
118122

119123
# The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that
120124
# generate an empty string in the nvcc command line causing the compilation to fail.
121125
# see https://gitlab.kitware.com/cmake/cmake/issues/16411
122126
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
123127
message(STATUS "Building in debug mode")
124-
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG};-G")
128+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G")
125129
endif()
126-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-O3")
130+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3")
127131

128132
if(PopSift_USE_POSITION_INDEPENDENT_CODE)
129-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
133+
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")
130134
endif()
131135

132136
# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
133137
# currently, the code requires legacy mode
134-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
138+
list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy")
135139
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")
136140

137-
if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") )
141+
if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
138142
if(PopSift_NVCC_WARNINGS)
139-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
140-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
141-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage")
142-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills")
143+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-lmem-usage")
144+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-spills")
145+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-local-memory-usage")
146+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-spills")
143147
endif()
144148
endif()
145149

146-
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4")
150+
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
147151
set(PopSift_HAVE_NORMF 1)
148152
else()
149153
set(PopSift_HAVE_NORMF 0)
150154
endif()
151155

152-
if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") )
156+
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
153157
set(HAVE_SHFL_DOWN_SYNC 1)
154158
else()
155159
set(HAVE_SHFL_DOWN_SYNC 0)

cmake/ChooseCudaCC.cmake

Lines changed: 93 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,46 @@
11
#
2-
# after returning from this function, do not forget to call the following:
3-
# set(RESULT_NAME ${RESULT_NAME} CACHE STRING "CUDA CC versions to compile")
4-
# replacing your own variable for RESULT_NAME
2+
# CUDA hardware and SDKs are developing over time, different SDK support different
3+
# hardware, and supported hardware differs depending on platform even for the same
4+
# SDK version.
5+
# This file attempts to provide a function that returns a valid selection of hardware
6+
# for the current SDK and platform.
57
#
6-
# We assume that MINCC default to 20
7-
# We assume that MINCUDAVERSION defaults to 7.0
8+
# It will require updates as CUDA develops, and it is currently not complete in terms
9+
# of existing platforms that support CUDA.
810
#
9-
function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
10-
if(NOT DEFINED MINCC)
11-
set(MINCC 20)
11+
# This function does not edit cache entries or variables in the parent scope
12+
# except for the variables whose names are supplied for SUPPORTED_CC and
13+
# SUPPORTED_GENCODE_FLAGS
14+
#
15+
# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to
16+
# CUDA_NVCC_FLAGS.
17+
# Like this:
18+
# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile")
19+
# end
20+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}")
21+
#
22+
# We assume that ${SUPPORTED_CC} can be overwritten.
23+
# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten.
24+
# We assume that MIN_CC default to 20
25+
# We assume that MIN_CUDA_VERSION defaults to 7.0
26+
#
27+
function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS)
28+
set(options "")
29+
set(oneValueArgs MIN_CUDA_VERSION MIN_CC)
30+
set(multipleValueArgs "")
31+
cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN})
32+
33+
if(NOT DEFINED CHOOSE_CUDA_MIN_CC)
34+
set(CHOOSE_CUDA_MIN_CC 20)
1235
endif()
13-
if(NOT DEFINED MINCUDAVERSION)
14-
set(MINCUDAVERSION 7.0)
36+
if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION)
37+
set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0)
1538
endif()
1639

17-
find_package(CUDA ${MINCUDAVERSION} REQUIRED)
40+
find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED)
1841

1942
if(NOT CUDA_FOUND)
20-
message(FATAL_ERROR "Could not find CUDA >= ${MINCUDAVERSION}")
43+
message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}")
2144
endif()
2245

2346
#
@@ -26,11 +49,17 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
2649
# it is possible that non-Tegra ARM systems exist as well.
2750
# For now, this is my best guess.
2851
#
29-
if((CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"))
30-
set(CC_LIST_BY_SYSTEM_PROCESSOR 20 21 30 35 50 52 60 61 70 75)
31-
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
32-
set(CC_LIST_BY_SYSTEM_PROCESSOR 32 53 62 72)
33-
else()
52+
set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64")
53+
set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64")
54+
55+
set(CC_LIST_BY_SYSTEM_PROCESSOR "")
56+
if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS)
57+
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75")
58+
endif()
59+
if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS)
60+
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72")
61+
endif()
62+
if(NOT CC_LIST_BY_SYSTEM_PROCESSOR)
3463
message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}")
3564
endif()
3665

@@ -52,11 +81,13 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
5281
else()
5382
message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0")
5483
endif()
84+
if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC})
85+
set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC})
86+
endif()
5587

5688
set(CC_LIST "")
5789
foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR})
58-
if( (${CC} GREATER ${MINCC}) AND
59-
(${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND
90+
if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND
6091
(${CC} LESS_EQUAL ${CUDA_MAX_CC}) )
6192
list(APPEND CC_LIST ${CC})
6293
endif()
@@ -65,10 +96,46 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
6596
#
6697
# Add all requested CUDA CCs to the command line for offline compilation
6798
#
68-
set(GENCODE_FLAGS "${CUDA_NVCC_FLAGS}")
99+
set(GENCODE_FLAGS "")
100+
list(SORT CC_LIST)
101+
foreach(CC_VERSION ${CC_LIST})
102+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
103+
endforeach()
104+
105+
#
106+
# Use the highest request CUDA CC for CUDA JIT compilation
107+
#
108+
list(LENGTH CC_LIST CC_LIST_LEN)
109+
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
110+
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
111+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")
112+
113+
#
114+
# Two variables are exported to the parent scope. One is passed through the
115+
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
116+
#
117+
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
118+
set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE)
119+
endfunction()
120+
121+
#
122+
# This function is used to create a list of gencode instructions for a given list
123+
# of CCs.
124+
# It takes as arguments is list of CCs and a list variable that can be filled with
125+
# gencode strings.
126+
#
127+
# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten.
128+
#
129+
function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS)
130+
set(CC_LIST "${${INPUT_CC_LIST}}")
131+
132+
#
133+
# Add all requested CUDA CCs to the command line for offline compilation
134+
#
135+
set(GENCODE_FLAGS "")
69136
list(SORT CC_LIST)
70137
foreach(CC_VERSION ${CC_LIST})
71-
set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
138+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
72139
endforeach()
73140

74141
#
@@ -77,13 +144,14 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
77144
list(LENGTH CC_LIST CC_LIST_LEN)
78145
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
79146
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
80-
set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")
147+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")
148+
149+
message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}")
81150

82151
#
83152
# Two variables are exported to the parent scope. One is passed through the
84-
# environment (CUDA_NVCC_FLAGS), the other is passed by name (RESULT_NAME)
153+
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
85154
#
86-
set(CUDA_NVCC_FLAGS ${GENCODE_FLAGS} PARENT_SCOPE)
87-
set(${RESULT_NAME} ${CC_LIST} PARENT_SCOPE)
155+
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
88156
endfunction()
89157

0 commit comments

Comments
 (0)