-
Notifications
You must be signed in to change notification settings - Fork 124
[cmake] CMake script for choosing CCs #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
9a7c9ec
a12d236
6856846
21baa3e
717f108
ec84e9c
dd3949b
af77e93
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,89 +86,74 @@ if(NOT CUDA_FOUND) | |
| message(FATAL_ERROR "Could not find CUDA >= 7.0") | ||
| endif() | ||
|
|
||
| # | ||
| # Default setting of the CUDA CC versions to compile. | ||
| # Shortening the lists saves a lot of compile time. | ||
| # | ||
| if(CUDA_VERSION_MAJOR GREATER 7) | ||
| set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62) | ||
| message(STATUS "CUDA Version is ${CUDA_VERSION}") | ||
|
|
||
| include(ChooseCudaCC) | ||
| if(NOT DEFINED PopSift_CUDA_CC_LIST) | ||
| chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC | ||
| PopSift_CUDA_GENCODE_FLAGS | ||
| MIN_CC 30 | ||
| MIN_CUDA_VERSION 7.0) | ||
| set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") | ||
| else() | ||
| set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 ) | ||
| getFlagsForCudaCCList(PopSift_CUDA_CC_LIST | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I guess we want to give the possibility to the user to provide a
# we always need to know which are all the supported CC
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC
PopSift_CUDA_GENCODE_FLAGS
MIN_CC 30
MIN_CUDA_VERSION 7.0)
# if PopSift_CUDA_CC_LIST is provided
if(DEFINED PopSift_CUDA_CC_LIST)
# check each provided element is supported
# ideally this function checks the provided cc are in the list generating an error if not
check_if_supported(PopSift_CUDA_CC_LIST PopSift_CUDA_CC_LIST_BASIC)
getFlagsForCudaCCList(PopSift_CUDA_CC_LIST PopSift_CUDA_GENCODE_FLAGS)
else()
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
endif()
list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}")and function(check_if_supported PROVIDED_CC SUPPORTED_CC)
foreach(cc IN LISTS PROVIDED_CC)
if(NOT ${cc} IN_LIST SUPPORTED_CC)
message(FATAL_ERROR "Compute capability ${cc} not supported. Supported CC are ${SUPPORTED_CC}")
endforeach()
endfunction()What do you think?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that a really good thing? Without that check, a user can configure for a brand new CC that we haven't put into the ChooseCudaCC.cmake file yet.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, you have a point! |
||
| PopSift_CUDA_GENCODE_FLAGS) | ||
| endif() | ||
| set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") | ||
| list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}") | ||
|
|
||
| if(PopSift_USE_NVTX_PROFILING) | ||
| message(STATUS "PROFILING CPU CODE: NVTX is in use") | ||
| endif() | ||
|
|
||
| if(PopSift_ERRCHK_AFTER_KERNEL) | ||
| message(STATUS "Synchronizing and checking errors after every kernel call") | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL") | ||
| list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL") | ||
| endif() | ||
|
|
||
| set(CUDA_SEPARABLE_COMPILATION ON) | ||
|
|
||
| if(UNIX AND NOT APPLE) | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic;-lineinfo") | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep") | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx") | ||
| list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic") | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") | ||
| list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep") | ||
| list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx") | ||
| endif() | ||
|
|
||
| # The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that | ||
| # generate an empty string in the nvcc command line causing the compilation to fail. | ||
| # see https://gitlab.kitware.com/cmake/cmake/issues/16411 | ||
| if(CMAKE_BUILD_TYPE STREQUAL "Debug") | ||
| message(STATUS "Building in debug mode") | ||
| set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG};-G") | ||
| list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G") | ||
| endif() | ||
| set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-O3") | ||
| list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3") | ||
|
|
||
| if(PopSift_USE_POSITION_INDEPENDENT_CODE) | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC") | ||
| endif() | ||
|
|
||
| # | ||
| # Add all requested CUDA CCs to the command line for offline compilation | ||
| # | ||
| list(SORT PopSift_CUDA_CC_LIST) | ||
| foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST}) | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}") | ||
| endforeach() | ||
|
|
||
| # | ||
| # Use the highest request CUDA CC for CUDA JIT compilation | ||
| # | ||
| list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN) | ||
| MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1") | ||
| list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST) | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}") | ||
|
|
||
| # default stream legacy implies that the 0 stream synchronizes all streams | ||
| list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") | ||
| endif() | ||
|
|
||
| # default stream per-thread implies that each host thread has one non-synchronizing 0-stream | ||
| # currently, the code requires legacy mode | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy") | ||
| list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy") | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread") | ||
|
|
||
| message(STATUS "CUDA Version is ${CUDA_VERSION}") | ||
| message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") | ||
| if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) | ||
| if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") | ||
| if(PopSift_NVCC_WARNINGS) | ||
| set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage") | ||
| set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills") | ||
| set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage") | ||
| set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills") | ||
| list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-lmem-usage") | ||
| list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-spills") | ||
| list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-local-memory-usage") | ||
| list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-spills") | ||
| endif() | ||
| endif() | ||
|
|
||
| if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4") | ||
| if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5") | ||
| set(PopSift_HAVE_NORMF 1) | ||
| else() | ||
| set(PopSift_HAVE_NORMF 0) | ||
| endif() | ||
|
|
||
| if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) | ||
| if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") | ||
| set(HAVE_SHFL_DOWN_SYNC 1) | ||
| else() | ||
| set(HAVE_SHFL_DOWN_SYNC 0) | ||
|
|
@@ -227,4 +212,4 @@ message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) | |
| message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") | ||
| message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) | ||
| message("\n******************************************") | ||
| message("\n") | ||
| message("\n") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,157 @@ | ||
| # | ||
| # CUDA hardware and SDKs are developing over time, different SDK support different | ||
| # hardware, and supported hardware differs depending on platform even for the same | ||
| # SDK version. | ||
| # This file attempts to provide a function that returns a valid selection of hardware | ||
| # for the current SDK and platform. | ||
| # | ||
| # It will require updates as CUDA develops, and it is currently not complete in terms | ||
| # of existing platforms that support CUDA. | ||
| # | ||
| # This function does not edit cache entries or variables in the parent scope | ||
| # except for the variables whose names are supplied for SUPPORTED_CC and | ||
| # SUPPORTED_GENCODE_FLAGS | ||
| # | ||
| # You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to | ||
| # CUDA_NVCC_FLAGS. | ||
| # Like this: | ||
| # set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile") | ||
| # end | ||
| # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}") | ||
| # | ||
| # We assume that ${SUPPORTED_CC} can be overwritten. | ||
|
simogasp marked this conversation as resolved.
Outdated
|
||
| # We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. | ||
| # We assume that MIN_CC default to 20 | ||
| # We assume that MIN_CUDA_VERSION defaults to 7.0 | ||
| # | ||
| function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) | ||
| set(options "") | ||
| set(oneValueArgs MIN_CUDA_VERSION MIN_CC) | ||
| set(multipleValueArgs "") | ||
| cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN}) | ||
|
|
||
| if(NOT DEFINED CHOOSE_CUDA_MIN_CC) | ||
| set(CHOOSE_CUDA_MIN_CC 20) | ||
| endif() | ||
| if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION) | ||
| set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0) | ||
| endif() | ||
|
|
||
| find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED) | ||
|
|
||
| if(NOT CUDA_FOUND) | ||
| message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}") | ||
| endif() | ||
|
|
||
| # | ||
| # Create a list of possible CCs for each host processor. | ||
| # This may require tuning: CUDA cards exist in AIX machines with POWER CPUs, | ||
| # it is possible that non-Tegra ARM systems exist as well. | ||
| # For now, this is my best guess. | ||
| # | ||
| set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64") | ||
| set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64") | ||
|
|
||
| set(CC_LIST_BY_SYSTEM_PROCESSOR "") | ||
| if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) | ||
| list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75") | ||
| endif() | ||
| if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) | ||
| list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") | ||
| endif() | ||
| if(NOT CC_LIST_BY_SYSTEM_PROCESSOR) | ||
| message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") | ||
| endif() | ||
|
|
||
| # | ||
| # Default setting of the CUDA CC versions to compile. | ||
| # Shortening the lists saves a lot of compile time. | ||
| # | ||
| set(CUDA_MIN_CC 20) | ||
| set(CUDA_MAX_CC 75) | ||
| if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) | ||
| set(CUDA_MIN_CC 30) | ||
| elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9) | ||
| set(CUDA_MIN_CC 30) | ||
| set(CUDA_MAX_CC 72) | ||
| elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8) | ||
| set(CUDA_MAX_CC 62) | ||
| elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7) | ||
| set(CUDA_MAX_CC 53) | ||
| else() | ||
| message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") | ||
| endif() | ||
| if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC}) | ||
| set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC}) | ||
| endif() | ||
|
|
||
| set(CC_LIST "") | ||
| foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) | ||
| if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND | ||
| (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) | ||
| list(APPEND CC_LIST ${CC}) | ||
| endif() | ||
| endforeach() | ||
|
|
||
| # | ||
| # Add all requested CUDA CCs to the command line for offline compilation | ||
| # | ||
| set(GENCODE_FLAGS "") | ||
| list(SORT CC_LIST) | ||
| foreach(CC_VERSION ${CC_LIST}) | ||
| list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") | ||
| endforeach() | ||
|
|
||
| # | ||
| # Use the highest request CUDA CC for CUDA JIT compilation | ||
| # | ||
| list(LENGTH CC_LIST CC_LIST_LEN) | ||
| MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") | ||
| list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) | ||
| list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") | ||
|
|
||
| # | ||
| # Two variables are exported to the parent scope. One is passed through the | ||
| # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) | ||
| # | ||
| set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) | ||
| set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE) | ||
| endfunction() | ||
|
|
||
| # | ||
| # This function is used to create a list of gencode instructions for a given list | ||
| # of CCs. | ||
| # It takes as arguments is list of CCs and a list variable that can be filled with | ||
| # gencode strings. | ||
| # | ||
| # We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. | ||
| # | ||
| function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) | ||
| set(CC_LIST "${${INPUT_CC_LIST}}") | ||
|
|
||
| # | ||
| # Add all requested CUDA CCs to the command line for offline compilation | ||
| # | ||
| set(GENCODE_FLAGS "") | ||
| list(SORT CC_LIST) | ||
| foreach(CC_VERSION ${CC_LIST}) | ||
| list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") | ||
| endforeach() | ||
|
|
||
| # | ||
| # Use the highest request CUDA CC for CUDA JIT compilation | ||
| # | ||
| list(LENGTH CC_LIST CC_LIST_LEN) | ||
| MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") | ||
| list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) | ||
| list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") | ||
|
|
||
| message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}") | ||
|
|
||
| # | ||
| # Two variables are exported to the parent scope. One is passed through the | ||
| # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) | ||
| # | ||
| set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) | ||
| endfunction() | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.