Skip to content

Commit d143285

Browse files
authored
Merge pull request #67 from alicevision/fix/cc7support
Fix crashes appearing on CUDA CC 7 cards.
2 parents 0c7ce9d + 5218f79 commit d143285

3 files changed

Lines changed: 9 additions & 7 deletions

File tree

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ install:
6161
- wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG
6262
- sudo dpkg -i $CUDA_REPO_PKG
6363
- rm ${CUDA_REPO_PKG}
64-
- sudo apt-get -y update
65-
- sudo apt-get install -y --no-install-recommends cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION
64+
- travis_retry sudo apt-get -y update
65+
- travis_retry sudo apt-get install -y --no-install-recommends cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION
6666
- sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda
6767

6868
before_script:

src/popsift/common/warp_bitonic_sort.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ class Warp32
6666
: ( my_val < other_val );
6767
const bool must_swap = not ( my_more ^ reverse ^ increasing );
6868

69-
return ( must_swap ? popsift::shuffle_xor( my_index, 1 << shift )
70-
: my_index );
69+
int lane = must_swap ? ( 1 << shift ) : 0;
70+
return popsift::shuffle_xor( my_index, lane );
7171
}
7272

7373
__device__ inline

src/popsift/s_desc_loop.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,10 @@ void ext_desc_loop_sub( const float ang,
7676

7777
float dpt[9] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
7878

79-
for( int i = threadIdx.x; i < loops; i+=blockDim.x )
79+
for( int i = threadIdx.x; popsift::any(i < loops); i+=blockDim.x )
8080
{
81+
if( i >= loops ) continue;
82+
8183
const int ii = i / wx + ymin;
8284
const int jj = i % wx + xmin;
8385

@@ -111,14 +113,14 @@ void ext_desc_loop_sub( const float ang,
111113
const float wgt2 = do0;
112114

113115
int fo = fo0 % DESC_BINS;
114-
116+
115117
// maf: multiply-add
116118
// _ru - round to positive infinity equiv to froundf since always >=0
117119
dpt[fo] = __fmaf_ru( wgt1, wgt, dpt[fo] ); // dpt[fo] += (wgt1*wgt);
118120
dpt[fo+1] = __fmaf_ru( wgt2, wgt, dpt[fo+1] ); // dpt[fo+1] += (wgt2*wgt);
119121
}
120-
__syncthreads();
121122
}
123+
__syncthreads();
122124

123125
dpt[0] += dpt[8];
124126

0 commit comments

Comments
 (0)