Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 32 additions & 47 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,89 +86,74 @@ if(NOT CUDA_FOUND)
message(FATAL_ERROR "Could not find CUDA >= 7.0")
endif()

#
# Default setting of the CUDA CC versions to compile.
# Shortening the lists saves a lot of compile time.
#
if(CUDA_VERSION_MAJOR GREATER 7)
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
message(STATUS "CUDA Version is ${CUDA_VERSION}")

include(ChooseCudaCC)
if(NOT DEFINED PopSift_CUDA_CC_LIST)
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC
PopSift_CUDA_GENCODE_FLAGS
MIN_CC 30
MIN_CUDA_VERSION 7.0)
Comment thread
simogasp marked this conversation as resolved.
Outdated
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
else()
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
getFlagsForCudaCCList(PopSift_CUDA_CC_LIST
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess we want to give the possibility to the user to provide a -DPopSift_CUDA_CC_LIST=30;70 at cmake stage, am I correct?
In that case:

  1. we should document it in the README in the build section

  2. more importantly, we should check that those provided are good/supported. One way to go it could be something like:

# we always need to know which are all the supported CC
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC 
            PopSift_CUDA_GENCODE_FLAGS
            MIN_CC 30
            MIN_CUDA_VERSION 7.0)
# if PopSift_CUDA_CC_LIST is provided
if(DEFINED PopSift_CUDA_CC_LIST)
    # check each provided element is supported
    # ideally this function checks the provided cc are in the list generating an error if not
    check_if_supported(PopSift_CUDA_CC_LIST  PopSift_CUDA_CC_LIST_BASIC)
    getFlagsForCudaCCList(PopSift_CUDA_CC_LIST PopSift_CUDA_GENCODE_FLAGS)
else()
    set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
endif()
list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}")

and

function(check_if_supported PROVIDED_CC  SUPPORTED_CC)
    foreach(cc IN LISTS PROVIDED_CC)
        if(NOT ${cc} IN_LIST SUPPORTED_CC)
            message(FATAL_ERROR "Compute capability ${cc} not supported. Supported CC are ${SUPPORTED_CC}")
    endforeach()
endfunction()

What do you think?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that a really good thing? Without that check, a user can configure for a brand new CC that we haven't put into the ChooseCudaCC.cmake file yet.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you have a point!
We can then leave it like that and assume that it is an advanced feature and the user knows what is doing.

PopSift_CUDA_GENCODE_FLAGS)
endif()
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}")

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
endif()

if(PopSift_ERRCHK_AFTER_KERNEL)
message(STATUS "Synchronizing and checking errors after every kernel call")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
endif()

set(CUDA_SEPARABLE_COMPILATION ON)

if(UNIX AND NOT APPLE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic;-lineinfo")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx")
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep")
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx")
endif()

# The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that
# generate an empty string in the nvcc command line causing the compilation to fail.
# see https://gitlab.kitware.com/cmake/cmake/issues/16411
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Building in debug mode")
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG};-G")
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G")
endif()
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-O3")
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3")

if(PopSift_USE_POSITION_INDEPENDENT_CODE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
endif()

#
# Add all requested CUDA CCs to the command line for offline compilation
#
list(SORT PopSift_CUDA_CC_LIST)
foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
endforeach()

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")

# default stream legacy implies that the 0 stream synchronizes all streams
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")
endif()

# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
# currently, the code requires legacy mode
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")

message(STATUS "CUDA Version is ${CUDA_VERSION}")
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") )
if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
if(PopSift_NVCC_WARNINGS)
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage")
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills")
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-lmem-usage")
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-spills")
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-local-memory-usage")
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-spills")
endif()
endif()

if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4")
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
set(PopSift_HAVE_NORMF 1)
else()
set(PopSift_HAVE_NORMF 0)
endif()

if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") )
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
set(HAVE_SHFL_DOWN_SYNC 1)
else()
set(HAVE_SHFL_DOWN_SYNC 0)
Expand Down Expand Up @@ -227,4 +212,4 @@ message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX})
message("\n******************************************")
message("\n")
message("\n")
157 changes: 157 additions & 0 deletions cmake/ChooseCudaCC.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#
# CUDA hardware and SDKs are developing over time, different SDK support different
# hardware, and supported hardware differs depending on platform even for the same
# SDK version.
# This file attempts to provide a function that returns a valid selection of hardware
# for the current SDK and platform.
#
# It will require updates as CUDA develops, and it is currently not complete in terms
# of existing platforms that support CUDA.
#
# This function does not edit cache entries or variables in the parent scope
# except for the variables whose names are supplied for SUPPORTED_CC and
# SUPPORTED_GENCODE_FLAGS
#
# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to
# CUDA_NVCC_FLAGS.
# Like this:
# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile")
# end
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}")
#
# We assume that ${SUPPORTED_CC} can be overwritten.
Comment thread
simogasp marked this conversation as resolved.
Outdated
# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten.
# We assume that MIN_CC default to 20
# We assume that MIN_CUDA_VERSION defaults to 7.0
#
function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS)
set(options "")
set(oneValueArgs MIN_CUDA_VERSION MIN_CC)
set(multipleValueArgs "")
cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN})

if(NOT DEFINED CHOOSE_CUDA_MIN_CC)
set(CHOOSE_CUDA_MIN_CC 20)
endif()
if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION)
set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0)
endif()

find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED)

if(NOT CUDA_FOUND)
message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}")
endif()

#
# Create a list of possible CCs for each host processor.
# This may require tuning: CUDA cards exist in AIX machines with POWER CPUs,
# it is possible that non-Tegra ARM systems exist as well.
# For now, this is my best guess.
#
set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64")
set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64")

set(CC_LIST_BY_SYSTEM_PROCESSOR "")
if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS)
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75")
endif()
if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS)
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72")
endif()
if(NOT CC_LIST_BY_SYSTEM_PROCESSOR)
message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}")
endif()

#
# Default setting of the CUDA CC versions to compile.
# Shortening the lists saves a lot of compile time.
#
set(CUDA_MIN_CC 20)
set(CUDA_MAX_CC 75)
if(CUDA_VERSION_MAJOR GREATER_EQUAL 10)
set(CUDA_MIN_CC 30)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9)
set(CUDA_MIN_CC 30)
set(CUDA_MAX_CC 72)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8)
set(CUDA_MAX_CC 62)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7)
set(CUDA_MAX_CC 53)
else()
message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0")
endif()
if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC})
set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC})
endif()

set(CC_LIST "")
foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR})
if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND
(${CC} LESS_EQUAL ${CUDA_MAX_CC}) )
list(APPEND CC_LIST ${CC})
endif()
endforeach()

#
# Add all requested CUDA CCs to the command line for offline compilation
#
set(GENCODE_FLAGS "")
list(SORT CC_LIST)
foreach(CC_VERSION ${CC_LIST})
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
endforeach()

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH CC_LIST CC_LIST_LEN)
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")

#
# Two variables are exported to the parent scope. One is passed through the
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
#
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE)
endfunction()

#
# This function is used to create a list of gencode instructions for a given list
# of CCs.
# It takes as arguments is list of CCs and a list variable that can be filled with
# gencode strings.
#
# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten.
#
function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS)
set(CC_LIST "${${INPUT_CC_LIST}}")

#
# Add all requested CUDA CCs to the command line for offline compilation
#
set(GENCODE_FLAGS "")
list(SORT CC_LIST)
foreach(CC_VERSION ${CC_LIST})
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
endforeach()

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH CC_LIST CC_LIST_LEN)
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")

message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}")

#
# Two variables are exported to the parent scope. One is passed through the
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
#
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
endfunction()

3 changes: 0 additions & 3 deletions src/popsift/s_extrema.cu
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,6 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog,

template<int HEIGHT, int sift_mode>
__global__
#ifdef NDEBUG
__launch_bounds__(128,16)
#endif
void find_extrema_in_dog( cudaTextureObject_t dog,
int octave,
int width,
Expand Down