Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 11 additions & 30 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,16 @@ if(NOT CUDA_FOUND)
message(FATAL_ERROR "Could not find CUDA >= 7.0")
endif()

#
# Default setting of the CUDA CC versions to compile.
# Shortening the lists saves a lot of compile time.
#
if(CUDA_VERSION_MAJOR GREATER 7)
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
else()
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
message(STATUS "CUDA Version is ${CUDA_VERSION}")

if(NOT DEFINED PopSift_CUDA_CC_LIST)
include(ChooseCudaCC)
set(PopSift_MIN_CC 30)
set(PopSift_MIN_CUDA_VERSION 7.0)
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION})
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
endif()
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
message(STATUS "Compiling for CUDA CCs ${PopSift_CUDA_CC_LIST}")

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
Expand All @@ -109,7 +109,7 @@ endif()
set(CUDA_SEPARABLE_COMPILATION ON)

if(UNIX AND NOT APPLE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic;-lineinfo")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep")
Expand All @@ -129,30 +129,11 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
endif()

#
# Add all requested CUDA CCs to the command line for offline compilation
#
list(SORT PopSift_CUDA_CC_LIST)
foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
endforeach()

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")

# default stream legacy implies that the 0 stream synchronizes all streams
# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
# currently, the code requires legacy mode
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")

message(STATUS "CUDA Version is ${CUDA_VERSION}")
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") )
if(PopSift_NVCC_WARNINGS)
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
Expand Down Expand Up @@ -227,4 +208,4 @@ message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX})
message("\n******************************************")
message("\n")
message("\n")
89 changes: 89 additions & 0 deletions cmake/ChooseCudaCC.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#
# after returning from this function, do not forget to call the following:
# set(RESULT_NAME ${RESULT_NAME} CACHE STRING "CUDA CC versions to compile")
# replacing your own variable for RESULT_NAME
#
# We assume that MINCC default to 20
# We assume that MINCUDAVERSION defaults to 7.0
#
function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION)
Comment thread
simogasp marked this conversation as resolved.
Outdated
if(NOT DEFINED MINCC)
set(MINCC 20)
endif()
if(NOT DEFINED MINCUDAVERSION)
set(MINCUDAVERSION 7.0)
endif()

find_package(CUDA ${MINCUDAVERSION} REQUIRED)

if(NOT CUDA_FOUND)
message(FATAL_ERROR "Could not find CUDA >= ${MINCUDAVERSION}")
endif()

#
# Create a list of possible CCs for each host processor.
# This may require tuning: CUDA cards exist in AIX machines with POWER CPUs,
# it is possible that non-Tegra ARM systems exist as well.
# For now, this is my best guess.
#
if((CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"))
Comment thread
simogasp marked this conversation as resolved.
Outdated
set(CC_LIST_BY_SYSTEM_PROCESSOR 20 21 30 35 50 52 60 61 70 75)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set(CC_LIST_BY_SYSTEM_PROCESSOR 32 53 62 72)
else()
message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}")
endif()

#
# Default setting of the CUDA CC versions to compile.
# Shortening the lists saves a lot of compile time.
#
set(CUDA_MIN_CC 20)
set(CUDA_MAX_CC 75)
if(CUDA_VERSION_MAJOR GREATER_EQUAL 10)
set(CUDA_MIN_CC 30)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9)
set(CUDA_MIN_CC 30)
set(CUDA_MAX_CC 72)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8)
set(CUDA_MAX_CC 62)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7)
set(CUDA_MAX_CC 53)
else()
message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0")
endif()

set(CC_LIST "")
foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR})
if( (${CC} GREATER ${MINCC}) AND
(${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND
(${CC} LESS_EQUAL ${CUDA_MAX_CC}) )
list(APPEND CC_LIST ${CC})
endif()
endforeach()

#
# Add all requested CUDA CCs to the command line for offline compilation
#
set(GENCODE_FLAGS "${CUDA_NVCC_FLAGS}")
list(SORT CC_LIST)
foreach(CC_VERSION ${CC_LIST})
set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
endforeach()

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH CC_LIST CC_LIST_LEN)
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")

#
# Two variables are exported to the parent scope. One is passed through the
# environment (CUDA_NVCC_FLAGS), the other is passed by name (RESULT_NAME)
#
set(CUDA_NVCC_FLAGS ${GENCODE_FLAGS} PARENT_SCOPE)
Comment thread
simogasp marked this conversation as resolved.
Outdated
set(${RESULT_NAME} ${CC_LIST} PARENT_SCOPE)
Comment thread
simogasp marked this conversation as resolved.
Outdated
endfunction()

3 changes: 0 additions & 3 deletions src/popsift/s_extrema.cu
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,6 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog,

template<int HEIGHT, int sift_mode>
__global__
#ifdef NDEBUG
__launch_bounds__(128,16)
#endif
void find_extrema_in_dog( cudaTextureObject_t dog,
int octave,
int width,
Expand Down