Skip to content

Commit de86c1c

Browse files
authored
Merge branch 'develop' into doc/first
2 parents 387face + d14aa17 commit de86c1c

File tree

11 files changed

+118
-79
lines changed

11 files changed

+118
-79
lines changed

.travis.yml

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ env:
1616
- CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}"
1717
- CUDA_VERSION_MAJOR="9" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.148-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}"
1818
- CUDA_VERSION_MAJOR="10" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.89-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}"
19+
- CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.2-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}"
1920

2021

2122
global:
@@ -57,21 +58,33 @@ before_install:
5758
5859
install:
5960
- UBUNTU_VERSION=ubuntu1604
60-
- CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb
61-
- wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG
62-
- travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
63-
- sudo dpkg -i $CUDA_REPO_PKG
64-
- rm ${CUDA_REPO_PKG}
65-
- travis_retry sudo apt-get -y update
66-
# cuda > 10.0 changed cublas naming
6761
- >
68-
if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then
69-
CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION
62+
if [ ${CUDA_VERSION_MAJOR} -lt 11 ]; then
63+
CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb
64+
wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG
65+
travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
66+
sudo dpkg -i $CUDA_REPO_PKG
67+
rm ${CUDA_REPO_PKG}
68+
travis_retry sudo apt-get -y update
69+
# cuda > 10.0 changed cublas naming
70+
if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then
71+
CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION
72+
else
73+
CUBLAS_PKG=libcublas-dev
74+
fi
75+
travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION
76+
sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda
7077
else
71-
CUBLAS_PKG=libcublas-dev
78+
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin
79+
travis_retry sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600
80+
travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
81+
travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /"
82+
sudo apt-get update && sudo apt-get -y install cuda
7283
fi
73-
- travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION
74-
- sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda
84+
# - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb
85+
# - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG
86+
87+
7588

7689
before_script:
7790
# Classic release build

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2020
### Added
2121
- Improved checks for CUDA textures [PR](https://github.com/alicevision/popsift/pull/89)
2222
- CMake: Improved support for all Cuda CC [PR](https://github.com/alicevision/popsift/pull/75)
23+
- CMake: support for cuda 11 [PR](https://github.com/alicevision/popsift/pull/103)
2324
- Support for Cuda CC 7 cards (RTX 2080) [PR](https://github.com/alicevision/popsift/pull/67)
2425
- Support for Boost 1.70 [PR](https://github.com/alicevision/popsift/pull/65)
2526

CMakeLists.txt

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@ option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after e
1414
option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
1515
option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
1616
option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF)
17-
option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF)
1817
option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF)
18+
option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF)
1919
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
2020

21-
2221
if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC)
2322
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
2423
endif()
@@ -45,16 +44,6 @@ if(MSVC AND NOT BUILD_SHARED_LIBS)
4544
endforeach()
4645
endif()
4746

48-
# for some reason this line is necessary to propagate the standard to nvcc
49-
# On MSVC this is not necessary / nvcc does not recognize the flag for MSVC
50-
if(NOT MSVC)
51-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
52-
endif()
53-
set(CMAKE_CXX_STANDARD 11)
54-
set(CMAKE_CXX_STANDARD_REQUIRED ON)
55-
set(CMAKE_CUDA_STANDARD 11)
56-
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
57-
5847
# ==============================================================================
5948
# GNUInstallDirs CMake module
6049
# - Define GNU standard installation directories
@@ -165,6 +154,21 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
165154
endif()
166155
endif()
167156

157+
set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11
158+
if(CUDA_VERSION_MAJOR LESS_EQUAL 8)
159+
set(PopSift_CXX_STANDARD 11)
160+
endif()
161+
162+
if(NOT MSVC)
163+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${PopSift_CXX_STANDARD}")
164+
list(APPEND CUDA_NVCC_FLAGS "-std=c++${PopSift_CXX_STANDARD}")
165+
endif()
166+
set(CMAKE_CXX_STANDARD ${PopSift_CXX_STANDARD})
167+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
168+
set(CMAKE_CUDA_STANDARD ${PopSift_CXX_STANDARD})
169+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
170+
171+
168172
if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5")
169173
set(PopSift_HAVE_NORMF 1)
170174
else()
@@ -201,7 +205,13 @@ if(PopSift_BUILD_DOCS)
201205
add_subdirectory(doc)
202206
endif()
203207

208+
set(PopSift_TESTFILE_PATH "popsift-samples/datasets/sample/big_set/" CACHE STRING "Base directory where your test files are stored")
204209
if(PopSift_USE_TEST_CMD)
210+
if(NOT IS_ABSOLUTE("${PopSift_TESTFILE_PATH}"))
211+
get_filename_component(PopSift_TESTFILES "${PopSift_TESTFILE_PATH}" ABSOLUTE)
212+
set(PopSift_TESTFILE_PATH "${PopSift_TESTFILES}")
213+
endif()
214+
205215
add_subdirectory(testScripts)
206216
endif()
207217

@@ -229,9 +239,12 @@ message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDE
229239
message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
230240
message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
231241
message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
232-
message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD})
233242
message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
234243
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
235244
message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX})
245+
message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD})
246+
if(PopSift_USE_TEST_CMD)
247+
message(STATUS "Path for test input: " ${PopSift_TESTFILE_PATH})
248+
endif()
236249
message("\n******************************************")
237250
message("\n")

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ PopSift tries to stick as closely as possible to David Lowe's famous paper [1],
1111

1212
PopSift compiles and works with NVidia cards of compute capability >= 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind.
1313

14+
CUDA SDK 11 does no longer support compute capability 3.0. 3.5 is still supported with deprecation warning.
15+
1416
## Dependencies
1517

1618
PopSift depends on:
1719

18-
* CUDA >= 7.0
20+
* Host compiler that supports C++14 for CUDA SDK >= 9.0 and C++11 for CUDA SDK 8
21+
22+
* CUDA >= 8.0
1923

2024
Optionally, for the provided applications:
2125

cmake/ChooseCudaCC.cmake

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS)
6565

6666
set(CC_LIST_BY_SYSTEM_PROCESSOR "")
6767
if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS)
68-
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75")
68+
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80")
6969
endif()
7070
if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS)
7171
list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72")
@@ -79,9 +79,12 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS)
7979
# Shortening the lists saves a lot of compile time.
8080
#
8181
set(CUDA_MIN_CC 20)
82-
set(CUDA_MAX_CC 75)
83-
if(CUDA_VERSION_MAJOR GREATER_EQUAL 10)
82+
set(CUDA_MAX_CC 80)
83+
if(CUDA_VERSION_MAJOR GREATER_EQUAL 11)
84+
set(CUDA_MIN_CC 35)
85+
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10)
8486
set(CUDA_MIN_CC 30)
87+
set(CUDA_MAX_CC 75)
8588
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9)
8689
set(CUDA_MIN_CC 30)
8790
set(CUDA_MAX_CC 72)

src/popsift/popsift.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ SiftJob::SiftJob( int w, int h, const float* imageData )
380380

381381
SiftJob::~SiftJob( )
382382
{
383-
delete [] _imageData;
383+
free( _imageData );
384384
}
385385

386386
void SiftJob::setImg( popsift::ImageBase* img )

src/popsift/s_orientation.cu

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ inline float compute_angle( int bin, float hc, float hn, float hp )
5252
return th;
5353
}
5454

55+
/*
56+
* Histogram smoothing helper
57+
*/
58+
template<int D>
59+
__device__
60+
inline static float smoothe( const float* const src, const int bin )
61+
{
62+
const int prev = (bin == 0) ? ORI_NBINS-1 : bin-1;
63+
const int next = (bin == ORI_NBINS-1) ? 0 : bin+1;
64+
65+
const float f = ( src[prev] + src[bin] + src[next] ) / 3.0f;
66+
67+
return f;
68+
}
69+
5570
/*
5671
* Compute the keypoint orientations for each extremum
5772
* using 16 threads for each of them.
@@ -66,16 +81,18 @@ void ori_par( const int octave,
6681
{
6782
const int extremum_index = blockIdx.x * blockDim.y;
6883

69-
if( extremum_index >= dct.ext_ct[octave] ) return; // a few trailing warps
84+
if( popsift::all( extremum_index >= dct.ext_ct[octave] ) ) return; // a few trailing warps
7085

7186
const int iext_off = dobuf.i_ext_off[octave][extremum_index];
7287
const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off];
7388

74-
__shared__ float hist [ORI_NBINS];
75-
__shared__ float sm_hist[ORI_NBINS];
89+
__shared__ float hist [64];
90+
__shared__ float sm_hist [64];
91+
__shared__ float refined_angle[64];
92+
__shared__ float yval [64];
7693

77-
for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f;
78-
__syncthreads();
94+
hist[threadIdx.x+ 0] = 0.0f;
95+
hist[threadIdx.x+32] = 0.0f;
7996

8097
/* keypoint fractional geometry */
8198
const float x = iext->xpos;
@@ -84,11 +101,11 @@ void ori_par( const int octave,
84101
const float sig = iext->sigma;
85102

86103
/* orientation histogram radius */
87-
float sigw = ORI_WINFACTOR * sig;
88-
int32_t rad = (int)roundf((3.0f * sigw));
104+
const float sigw = ORI_WINFACTOR * sig;
105+
const int32_t rad = (int)roundf((3.0f * sigw));
89106

90-
float factor = __fdividef( -0.5f, (sigw * sigw) );
91-
int sq_thres = rad * rad;
107+
const float factor = __fdividef( -0.5f, (sigw * sigw) );
108+
const int sq_thres = rad * rad;
92109

93110
// int xmin = max(1, (int)floor(x - rad));
94111
// int xmax = min(w - 2, (int)floor(x + rad));
@@ -103,6 +120,7 @@ void ori_par( const int octave,
103120
int hy = ymax - ymin + 1;
104121
int loops = wx * hy;
105122

123+
__syncthreads();
106124
for( int i = threadIdx.x; popsift::any(i < loops); i += blockDim.x )
107125
{
108126
if( i < loops ) {
@@ -122,7 +140,8 @@ void ori_par( const int octave,
122140
float dy = yy - y;
123141

124142
int sq_dist = dx * dx + dy * dy;
125-
if (sq_dist <= sq_thres) {
143+
if (sq_dist <= sq_thres)
144+
{
126145
float weight = grad * expf(sq_dist * factor);
127146

128147
// int bidx = (int)rintf( __fdividef( ORI_NBINS * (theta + M_PI), M_PI2 ) );
@@ -131,33 +150,31 @@ void ori_par( const int octave,
131150
if( bidx > ORI_NBINS ) {
132151
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
133152
}
153+
if( bidx < 0 ) {
154+
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
155+
}
134156

135157
bidx = (bidx == ORI_NBINS) ? 0 : bidx;
136158

137159
atomicAdd( &hist[bidx], weight );
138160
}
139161
}
140-
__syncthreads();
141162
}
163+
__syncthreads();
142164

143165
#ifdef WITH_VLFEAT_SMOOTHING
144-
for( int i=0; i<3; i++ ) {
145-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
146-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
147-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
148-
sm_hist[bin] = ( hist[prev] + hist[bin] + hist[next] ) / 3.0f;
149-
}
166+
for( int i=0; i<3 ; i++ )
167+
{
168+
sm_hist[threadIdx.x+ 0] = smoothe<0>( hist, threadIdx.x+ 0 );
169+
sm_hist[threadIdx.x+32] = smoothe<1>( hist, threadIdx.x+32 );
150170
__syncthreads();
151-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
152-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
153-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
154-
hist[bin] = ( sm_hist[prev] + sm_hist[bin] + sm_hist[next] ) / 3.0f;
155-
}
171+
hist[threadIdx.x+ 0] = smoothe<2>( sm_hist, threadIdx.x+ 0 );
172+
hist[threadIdx.x+32] = smoothe<3>( sm_hist, threadIdx.x+32 );
156173
__syncthreads();
157174
}
158-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
159-
sm_hist[bin] = hist[bin];
160-
}
175+
176+
sm_hist[threadIdx.x+ 0] = hist[threadIdx.x+ 0];
177+
sm_hist[threadIdx.x+32] = hist[threadIdx.x+32];
161178
__syncthreads();
162179
#else // not WITH_VLFEAT_SMOOTHING
163180
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
@@ -178,8 +195,6 @@ void ori_par( const int octave,
178195

179196
// sub-cell refinement of the histogram cell index, yielding the angle
180197
// not necessary to initialize, every cell is computed
181-
__shared__ float refined_angle[64];
182-
__shared__ float yval [64];
183198

184199
for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) {
185200
const int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
@@ -349,11 +364,8 @@ void ori_prefix_sum( const int total_ext_ct, const int num_octaves )
349364
__host__
350365
void Pyramid::orientation( const Config& conf )
351366
{
352-
nvtxRangePushA( "reading extrema count" );
353367
readDescCountersFromDevice( );
354-
nvtxRangePop( );
355368

356-
nvtxRangePushA( "filtering grid" );
357369
int ext_total = 0;
358370
for(int o : hct.ext_ct)
359371
{
@@ -369,11 +381,8 @@ void Pyramid::orientation( const Config& conf )
369381
{
370382
ext_total = extrema_filter_grid( conf, ext_total );
371383
}
372-
nvtxRangePop( );
373384

374-
nvtxRangePushA( "reallocating extrema arrays" );
375385
reallocExtrema( ext_total );
376-
nvtxRangePop( );
377386

378387
int ext_ct_prefix_sum = 0;
379388
for( int octave=0; octave<_num_octaves; octave++ ) {
@@ -402,7 +411,7 @@ void Pyramid::orientation( const Config& conf )
402411
grid.x = num;
403412

404413
ori_par
405-
<<<grid,block,0,oct_str>>>
414+
<<<grid,block,4*64*sizeof(float),oct_str>>>
406415
( octave,
407416
hct.ext_ps[octave],
408417
oct_obj.getDataTexPoint( ),

src/popsift/sift_pyramid.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
290290
nvtxRangePushA( "download descriptors" );
291291
FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total );
292292

293-
if( hct.ext_total == 0 )
293+
if( hct.ext_total == 0 || hct.ori_total == 0 )
294294
{
295295
nvtxRangePop();
296296
return features;

testScripts/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/testOxfordDataset.sh.in
55
${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh )
66

77
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/TEST.sh.in
8-
${CMAKE_CURRENT_BINARY_DIR}/TEST.sh )
8+
${CMAKE_CURRENT_BINARY_DIR}/TEST.sh )
99

1010
add_custom_target(
1111
prepare-test

0 commit comments

Comments
 (0)