Skip to content

Commit b23338e

Browse files
authored
Merge pull request #109 from alicevision/fix/misalignBugOri
Pass correct shared memory size to orientation kernel
2 parents 069570d + 913b358 commit b23338e

File tree

2 files changed

+44
-35
lines changed

2 files changed

+44
-35
lines changed

src/popsift/s_orientation.cu

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ inline float compute_angle( int bin, float hc, float hn, float hp )
5252
return th;
5353
}
5454

55+
/*
56+
* Histogram smoothing helper
57+
*/
58+
template<int D>
59+
__device__
60+
inline static float smoothe( const float* const src, const int bin )
61+
{
62+
const int prev = (bin == 0) ? ORI_NBINS-1 : bin-1;
63+
const int next = (bin == ORI_NBINS-1) ? 0 : bin+1;
64+
65+
const float f = ( src[prev] + src[bin] + src[next] ) / 3.0f;
66+
67+
return f;
68+
}
69+
5570
/*
5671
* Compute the keypoint orientations for each extremum
5772
* using 16 threads for each of them.
@@ -66,16 +81,18 @@ void ori_par( const int octave,
6681
{
6782
const int extremum_index = blockIdx.x * blockDim.y;
6883

69-
if( extremum_index >= dct.ext_ct[octave] ) return; // a few trailing warps
84+
if( popsift::all( extremum_index >= dct.ext_ct[octave] ) ) return; // a few trailing warps
7085

7186
const int iext_off = dobuf.i_ext_off[octave][extremum_index];
7287
const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off];
7388

74-
__shared__ float hist [ORI_NBINS];
75-
__shared__ float sm_hist[ORI_NBINS];
89+
__shared__ float hist [64];
90+
__shared__ float sm_hist [64];
91+
__shared__ float refined_angle[64];
92+
__shared__ float yval [64];
7693

77-
for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f;
78-
__syncthreads();
94+
hist[threadIdx.x+ 0] = 0.0f;
95+
hist[threadIdx.x+32] = 0.0f;
7996

8097
/* keypoint fractional geometry */
8198
const float x = iext->xpos;
@@ -84,11 +101,11 @@ void ori_par( const int octave,
84101
const float sig = iext->sigma;
85102

86103
/* orientation histogram radius */
87-
float sigw = ORI_WINFACTOR * sig;
88-
int32_t rad = (int)roundf((3.0f * sigw));
104+
const float sigw = ORI_WINFACTOR * sig;
105+
const int32_t rad = (int)roundf((3.0f * sigw));
89106

90-
float factor = __fdividef( -0.5f, (sigw * sigw) );
91-
int sq_thres = rad * rad;
107+
const float factor = __fdividef( -0.5f, (sigw * sigw) );
108+
const int sq_thres = rad * rad;
92109

93110
// int xmin = max(1, (int)floor(x - rad));
94111
// int xmax = min(w - 2, (int)floor(x + rad));
@@ -103,6 +120,7 @@ void ori_par( const int octave,
103120
int hy = ymax - ymin + 1;
104121
int loops = wx * hy;
105122

123+
__syncthreads();
106124
for( int i = threadIdx.x; popsift::any(i < loops); i += blockDim.x )
107125
{
108126
if( i < loops ) {
@@ -122,7 +140,8 @@ void ori_par( const int octave,
122140
float dy = yy - y;
123141

124142
int sq_dist = dx * dx + dy * dy;
125-
if (sq_dist <= sq_thres) {
143+
if (sq_dist <= sq_thres)
144+
{
126145
float weight = grad * expf(sq_dist * factor);
127146

128147
// int bidx = (int)rintf( __fdividef( ORI_NBINS * (theta + M_PI), M_PI2 ) );
@@ -131,33 +150,31 @@ void ori_par( const int octave,
131150
if( bidx > ORI_NBINS ) {
132151
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
133152
}
153+
if( bidx < 0 ) {
154+
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
155+
}
134156

135157
bidx = (bidx == ORI_NBINS) ? 0 : bidx;
136158

137159
atomicAdd( &hist[bidx], weight );
138160
}
139161
}
140-
__syncthreads();
141162
}
163+
__syncthreads();
142164

143165
#ifdef WITH_VLFEAT_SMOOTHING
144-
for( int i=0; i<3; i++ ) {
145-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
146-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
147-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
148-
sm_hist[bin] = ( hist[prev] + hist[bin] + hist[next] ) / 3.0f;
149-
}
166+
for( int i=0; i<3 ; i++ )
167+
{
168+
sm_hist[threadIdx.x+ 0] = smoothe<0>( hist, threadIdx.x+ 0 );
169+
sm_hist[threadIdx.x+32] = smoothe<1>( hist, threadIdx.x+32 );
150170
__syncthreads();
151-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
152-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
153-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
154-
hist[bin] = ( sm_hist[prev] + sm_hist[bin] + sm_hist[next] ) / 3.0f;
155-
}
171+
hist[threadIdx.x+ 0] = smoothe<2>( sm_hist, threadIdx.x+ 0 );
172+
hist[threadIdx.x+32] = smoothe<3>( sm_hist, threadIdx.x+32 );
156173
__syncthreads();
157174
}
158-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
159-
sm_hist[bin] = hist[bin];
160-
}
175+
176+
sm_hist[threadIdx.x+ 0] = hist[threadIdx.x+ 0];
177+
sm_hist[threadIdx.x+32] = hist[threadIdx.x+32];
161178
__syncthreads();
162179
#else // not WITH_VLFEAT_SMOOTHING
163180
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
@@ -178,8 +195,6 @@ void ori_par( const int octave,
178195

179196
// sub-cell refinement of the histogram cell index, yielding the angle
180197
// not necessary to initialize, every cell is computed
181-
__shared__ float refined_angle[64];
182-
__shared__ float yval [64];
183198

184199
for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) {
185200
const int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
@@ -349,11 +364,8 @@ void ori_prefix_sum( const int total_ext_ct, const int num_octaves )
349364
__host__
350365
void Pyramid::orientation( const Config& conf )
351366
{
352-
nvtxRangePushA( "reading extrema count" );
353367
readDescCountersFromDevice( );
354-
nvtxRangePop( );
355368

356-
nvtxRangePushA( "filtering grid" );
357369
int ext_total = 0;
358370
for(int o : hct.ext_ct)
359371
{
@@ -369,11 +381,8 @@ void Pyramid::orientation( const Config& conf )
369381
{
370382
ext_total = extrema_filter_grid( conf, ext_total );
371383
}
372-
nvtxRangePop( );
373384

374-
nvtxRangePushA( "reallocating extrema arrays" );
375385
reallocExtrema( ext_total );
376-
nvtxRangePop( );
377386

378387
int ext_ct_prefix_sum = 0;
379388
for( int octave=0; octave<_num_octaves; octave++ ) {
@@ -402,7 +411,7 @@ void Pyramid::orientation( const Config& conf )
402411
grid.x = num;
403412

404413
ori_par
405-
<<<grid,block,0,oct_str>>>
414+
<<<grid,block,4*64*sizeof(float),oct_str>>>
406415
( octave,
407416
hct.ext_ps[octave],
408417
oct_obj.getDataTexPoint( ),

src/popsift/sift_pyramid.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
290290
nvtxRangePushA( "download descriptors" );
291291
FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total );
292292

293-
if( hct.ext_total == 0 )
293+
if( hct.ext_total == 0 || hct.ori_total == 0 )
294294
{
295295
nvtxRangePop();
296296
return features;

0 commit comments

Comments
 (0)