Skip to content

Commit b28eaf4

Browse files
author
Carsten Griwodz
committed
[bugfix] use helper function for histogram smoothing
1 parent 800b7b1 commit b28eaf4

File tree

1 file changed

+31
-19
lines changed

1 file changed

+31
-19
lines changed

src/popsift/s_orientation.cu

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ inline float compute_angle( int bin, float hc, float hn, float hp )
5252
return th;
5353
}
5454

55+
/*
56+
* Histogram smoothing helper
57+
*/
58+
template<int D>
59+
__device__
60+
inline static float smoothe( const float* const src, const int bin )
61+
{
62+
const int prev = (bin == 0) ? ORI_NBINS-1 : bin-1;
63+
const int next = (bin == ORI_NBINS-1) ? 0 : bin+1;
64+
65+
const float f = ( src[prev] + src[bin] + src[next] ) / 3.0f;
66+
67+
return f;
68+
}
69+
5570
/*
5671
* Compute the keypoint orientations for each extremum
5772
* using 16 threads for each of them.
@@ -71,13 +86,13 @@ void ori_par( const int octave,
7186
const int iext_off = dobuf.i_ext_off[octave][extremum_index];
7287
const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off];
7388

74-
__shared__ float hist [ORI_NBINS];
75-
__shared__ float sm_hist[ORI_NBINS];
89+
__shared__ float hist [64];
90+
__shared__ float sm_hist [64];
7691
__shared__ float refined_angle[64];
7792
__shared__ float yval [64];
7893

79-
for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f;
80-
__syncthreads();
94+
hist[threadIdx.x+ 0] = 0.0f;
95+
hist[threadIdx.x+32] = 0.0f;
8196

8297
/* keypoint fractional geometry */
8398
const float x = iext->xpos;
@@ -105,6 +120,7 @@ void ori_par( const int octave,
105120
int hy = ymax - ymin + 1;
106121
int loops = wx * hy;
107122

123+
__syncthreads();
108124
for( int i = threadIdx.x; popsift::any(i < loops); i += blockDim.x )
109125
{
110126
if( i < loops ) {
@@ -124,7 +140,8 @@ void ori_par( const int octave,
124140
float dy = yy - y;
125141

126142
int sq_dist = dx * dx + dy * dy;
127-
if (sq_dist <= sq_thres) {
143+
if (sq_dist <= sq_thres)
144+
{
128145
float weight = grad * expf(sq_dist * factor);
129146

130147
// int bidx = (int)rintf( __fdividef( ORI_NBINS * (theta + M_PI), M_PI2 ) );
@@ -146,23 +163,18 @@ void ori_par( const int octave,
146163
__syncthreads();
147164

148165
#ifdef WITH_VLFEAT_SMOOTHING
149-
for( int i=0; i<3; i++ ) {
150-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
151-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
152-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
153-
sm_hist[bin] = ( hist[prev] + hist[bin] + hist[next] ) / 3.0f;
154-
}
166+
for( int i=0; i<3 ; i++ )
167+
{
168+
sm_hist[threadIdx.x+ 0] = smoothe<0>( hist, threadIdx.x+ 0 );
169+
sm_hist[threadIdx.x+32] = smoothe<1>( hist, threadIdx.x+32 );
155170
__syncthreads();
156-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
157-
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
158-
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
159-
hist[bin] = ( sm_hist[prev] + sm_hist[bin] + sm_hist[next] ) / 3.0f;
160-
}
171+
hist[threadIdx.x+ 0] = smoothe<2>( sm_hist, threadIdx.x+ 0 );
172+
hist[threadIdx.x+32] = smoothe<3>( sm_hist, threadIdx.x+32 );
161173
__syncthreads();
162174
}
163-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
164-
sm_hist[bin] = hist[bin];
165-
}
175+
176+
sm_hist[threadIdx.x+ 0] = hist[threadIdx.x+ 0];
177+
sm_hist[threadIdx.x+32] = hist[threadIdx.x+32];
166178
__syncthreads();
167179
#else // not WITH_VLFEAT_SMOOTHING
168180
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {

0 commit comments

Comments
 (0)