Skip to content

Commit df7c387

Browse files
authored
Merge branch 'develop' into ci/xenial
2 parents ce9fa95 + fadb397 commit df7c387

File tree

4 files changed

+205
-52
lines changed

4 files changed

+205
-52
lines changed

CMakeLists.txt

Lines changed: 32 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -86,89 +86,74 @@ if(NOT CUDA_FOUND)
8686
message(FATAL_ERROR "Could not find CUDA >= 7.0")
8787
endif()
8888

89-
#
90-
# Default setting of the CUDA CC versions to compile.
91-
# Shortening the lists saves a lot of compile time.
92-
#
93-
if(CUDA_VERSION_MAJOR GREATER 7)
94-
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61)
89+
message(STATUS "CUDA Version is ${CUDA_VERSION}")
90+
91+
include(ChooseCudaCC)
92+
if(NOT DEFINED PopSift_CUDA_CC_LIST)
93+
chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC
94+
PopSift_CUDA_GENCODE_FLAGS
95+
MIN_CC 30
96+
MIN_CUDA_VERSION 7.0)
97+
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
9598
else()
96-
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
99+
getFlagsForCudaCCList(PopSift_CUDA_CC_LIST
100+
PopSift_CUDA_GENCODE_FLAGS)
97101
endif()
98-
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
102+
list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}")
99103

100104
if(PopSift_USE_NVTX_PROFILING)
101105
message(STATUS "PROFILING CPU CODE: NVTX is in use")
102106
endif()
103107

104108
if(PopSift_ERRCHK_AFTER_KERNEL)
105109
message(STATUS "Synchronizing and checking errors after every kernel call")
106-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
110+
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
107111
endif()
108112

109113
set(CUDA_SEPARABLE_COMPILATION ON)
110114

111115
if(UNIX AND NOT APPLE)
112-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic;-lineinfo")
113-
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
114-
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
115-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep")
116-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx")
116+
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic")
117+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v")
118+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage")
119+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep")
120+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx")
117121
endif()
118122

119123
# The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that
120124
# generate an empty string in the nvcc command line causing the compilation to fail.
121125
# see https://gitlab.kitware.com/cmake/cmake/issues/16411
122126
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
123127
message(STATUS "Building in debug mode")
124-
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG};-G")
128+
list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G")
125129
endif()
126-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-O3")
130+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3")
127131

128132
if(PopSift_USE_POSITION_INDEPENDENT_CODE)
129-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
130-
endif()
131-
132-
#
133-
# Add all requested CUDA CCs to the command line for offline compilation
134-
#
135-
list(SORT PopSift_CUDA_CC_LIST)
136-
foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
137-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
138-
endforeach()
139-
140-
#
141-
# Use the highest request CUDA CC for CUDA JIT compilation
142-
#
143-
list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
144-
MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
145-
list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
146-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")
147-
148-
# default stream legacy implies that the 0 stream synchronizes all streams
133+
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")
134+
endif()
135+
149136
# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
150137
# currently, the code requires legacy mode
151-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
138+
list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy")
152139
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")
153140

154-
message(STATUS "CUDA Version is ${CUDA_VERSION}")
155-
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
156-
if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") )
141+
if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
157142
if(PopSift_NVCC_WARNINGS)
158-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
159-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
160-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage")
161-
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills")
143+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-lmem-usage")
144+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-spills")
145+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-local-memory-usage")
146+
list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-spills")
162147
endif()
163148
endif()
164149

165-
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4")
150+
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
166151
set(PopSift_HAVE_NORMF 1)
167152
else()
168153
set(PopSift_HAVE_NORMF 0)
169154
endif()
170155

171-
if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") )
156+
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
172157
set(HAVE_SHFL_DOWN_SYNC 1)
173158
else()
174159
set(HAVE_SHFL_DOWN_SYNC 0)
@@ -227,4 +212,4 @@ message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
227212
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
228213
message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX})
229214
message("\n******************************************")
230-
message("\n")
215+
message("\n")

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ We acknowledge that there is at least one SIFT implementation that is vastly fas
109109
## License
110110

111111
PopSift is licensed under [MPL v2 license](COPYING.md).
112-
However, SIFT is patented in the US and perhaps other countries, and this license does not release users of this code from any requirements that may arise from such patents.
112+
SIFT was patented in the United States from 1999-03-08 to 2020-03-28. See the [patent link](https://patents.google.com/patent/US6711293B1/en) for more information.
113+
PopSift license only concerns the PopSift source code and does not release users of this code from any requirements that may arise from patents.
114+
113115

114116
## Cite Us
115117

@@ -135,7 +137,7 @@ If you use PopSift for your publication, please cite us as:
135137

136138
## Acknowledgements
137139

138-
PopSift was developed within the project [POPART](http://www.popartproject.eu), which has been funded by the European Commission in the Horizon 2020 framework.
140+
PopSift was developed within the project [POPART](https://alicevision.org/popart), which has been funded by the [European Commission in the Horizon 2020](https://cordis.europa.eu/project/id/644874) framework.
139141

140142
___
141143

cmake/ChooseCudaCC.cmake

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#
2+
# This file contains two functions:
3+
# chooseCudaCC
4+
# getFlagsForCudaCCList
5+
#
6+
# Motivation:
7+
# CUDA hardware and SDKs are developing over time, different SDK support different
8+
# hardware, and supported hardware differs depending on platform even for the same
9+
# SDK version. This file attempts to provide a function that returns a valid selection
10+
# of hardware for the current SDK and platform. It will require updates as CUDA develops,
11+
# and it is currently not complete in terms of existing platforms that support CUDA.
12+
#
13+
14+
#
15+
# Return the minimal set of supported Cuda CC
16+
#
17+
# Usage:
18+
# chooseCudaCC(SUPPORTED_CC SUPPORTED_GENCODE_FLAGS
19+
# [MIN_CUDA_VERSION X.Y]
20+
# [MIN_CC XX ])
21+
#
22+
# SUPPORTED_CC out variable. Stores the list of supported CC.
23+
# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS
24+
# MIN_CUDA_VERSION the minimal supported version of cuda (e.g. 7.5, default 7.0).
25+
# MIN_CC minimal supported Cuda CC by the project (e.g. 35, default 20)
26+
#
27+
# This function does not edit cache entries or variables in the parent scope
28+
# except for the variables whose names are supplied for SUPPORTED_CC and
29+
# SUPPORTED_GENCODE_FLAGS
30+
#
31+
# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to
32+
# CUDA_NVCC_FLAGS.
33+
# Like this:
34+
# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile")
35+
# end
36+
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}")
37+
#
38+
function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS)
39+
set(options "")
40+
set(oneValueArgs MIN_CUDA_VERSION MIN_CC)
41+
set(multipleValueArgs "")
42+
cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN})
43+
44+
if(NOT DEFINED CHOOSE_CUDA_MIN_CC)
45+
set(CHOOSE_CUDA_MIN_CC 20)
46+
endif()
47+
if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION)
48+
set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0)
49+
endif()
50+
51+
find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED)
52+
53+
if(NOT CUDA_FOUND)
54+
message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}")
55+
endif()
56+
57+
#
58+
# Create a list of possible CCs for each host processor.
59+
# This may require tuning: CUDA cards exist in AIX machines with POWER CPUs,
60+
# it is possible that non-Tegra ARM systems exist as well.
61+
# For now, this is my best guess.
62+
#
63+
set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64")
64+
set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64")
65+
66+
set(CC_LIST_BY_SYSTEM_PROCESSOR "")
67+
if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS)
68+
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75")
69+
endif()
70+
if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS)
71+
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72")
72+
endif()
73+
if(NOT CC_LIST_BY_SYSTEM_PROCESSOR)
74+
message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}")
75+
endif()
76+
77+
#
78+
# Default setting of the CUDA CC versions to compile.
79+
# Shortening the lists saves a lot of compile time.
80+
#
81+
set(CUDA_MIN_CC 20)
82+
set(CUDA_MAX_CC 75)
83+
if(CUDA_VERSION_MAJOR GREATER_EQUAL 10)
84+
set(CUDA_MIN_CC 30)
85+
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9)
86+
set(CUDA_MIN_CC 30)
87+
set(CUDA_MAX_CC 72)
88+
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8)
89+
set(CUDA_MAX_CC 62)
90+
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7)
91+
set(CUDA_MAX_CC 53)
92+
else()
93+
message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0")
94+
endif()
95+
if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC})
96+
set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC})
97+
endif()
98+
99+
set(CC_LIST "")
100+
foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR})
101+
if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND
102+
(${CC} LESS_EQUAL ${CUDA_MAX_CC}) )
103+
list(APPEND CC_LIST ${CC})
104+
endif()
105+
endforeach()
106+
107+
#
108+
# Add all requested CUDA CCs to the command line for offline compilation
109+
#
110+
set(GENCODE_FLAGS "")
111+
list(SORT CC_LIST)
112+
foreach(CC_VERSION ${CC_LIST})
113+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
114+
endforeach()
115+
116+
#
117+
# Use the highest request CUDA CC for CUDA JIT compilation
118+
#
119+
list(LENGTH CC_LIST CC_LIST_LEN)
120+
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
121+
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
122+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")
123+
124+
#
125+
# Two variables are exported to the parent scope. One is passed through the
126+
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
127+
#
128+
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
129+
set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE)
130+
endfunction()
131+
132+
#
133+
# Return the gencode parameters for a given list of CCs.
134+
#
135+
# Usage:
136+
# getFlagsForCudaCCList(INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS)
137+
#
138+
# INPUT_CC_LIST in variable. Contains a list of supported CCs.
139+
# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS
140+
#
141+
function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS)
142+
set(CC_LIST "${${INPUT_CC_LIST}}")
143+
144+
#
145+
# Add all requested CUDA CCs to the command line for offline compilation
146+
#
147+
set(GENCODE_FLAGS "")
148+
list(SORT CC_LIST)
149+
foreach(CC_VERSION ${CC_LIST})
150+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}")
151+
endforeach()
152+
153+
#
154+
# Use the highest request CUDA CC for CUDA JIT compilation
155+
#
156+
list(LENGTH CC_LIST CC_LIST_LEN)
157+
MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1")
158+
list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST)
159+
list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}")
160+
161+
message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}")
162+
163+
#
164+
# Two variables are exported to the parent scope. One is passed through the
165+
# environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC)
166+
#
167+
set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE)
168+
endfunction()
169+

src/popsift/s_extrema.cu

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,9 +506,6 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog,
506506

507507
template<int HEIGHT, int sift_mode>
508508
__global__
509-
#ifdef NDEBUG
510-
__launch_bounds__(128,16)
511-
#endif
512509
void find_extrema_in_dog( cudaTextureObject_t dog,
513510
int octave,
514511
int width,

0 commit comments

Comments
 (0)