Skip to content

Commit 2956630

Browse files
author
Carsten Griwodz
committed
[popsift] change orientation computation to get closer to VLFeat
1 parent b13d259 commit 2956630

1 file changed

Lines changed: 41 additions & 77 deletions

File tree

src/popsift/s_orientation.cu

Lines changed: 41 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -28,36 +28,14 @@
2828
using namespace popsift;
2929
using namespace std;
3030

31-
/* Smoothing like VLFeat is the default mode.
32-
* If you choose to undefine it, you get the smoothing approach taken by OpenCV
33-
*/
34-
#define WITH_VLFEAT_SMOOTHING
35-
3631
namespace popsift
3732
{
3833

39-
__device__
40-
inline float compute_angle( int bin, float hc, float hn, float hp )
41-
{
42-
/* interpolate */
43-
float di = bin + 0.5f * (hn - hp) / (hc+hc-hn-hp);
44-
45-
/* clamp */
46-
di = (di < 0) ?
47-
(di + ORI_NBINS) :
48-
((di >= ORI_NBINS) ? (di - ORI_NBINS) : (di));
49-
50-
float th = __fdividef( M_PI2 * di, ORI_NBINS ) - M_PI;
51-
// float th = ((M_PI2 * di) / ORI_NBINS);
52-
return th;
53-
}
54-
5534
/*
5635
* Histogram smoothing helper
5736
*/
58-
template<int D>
59-
__device__
60-
inline static float smoothe( const float* const src, const int bin )
37+
__device__ inline static
38+
float smoothe( const float* const src, const int bin )
6139
{
6240
const int prev = (bin == 0) ? ORI_NBINS-1 : bin-1;
6341
const int next = (bin == ORI_NBINS-1) ? 0 : bin+1;
@@ -102,7 +80,7 @@ void ori_par( const int octave,
10280

10381
/* orientation histogram radius */
10482
const float sigw = ORI_WINFACTOR * sig;
105-
const int32_t rad = (int)roundf((3.0f * sigw));
83+
const int32_t rad = max( (int)floorf((3.0f * sigw)), 1 );
10684

10785
const float factor = __fdividef( -0.5f, (sigw * sigw) );
10886
const int sq_thres = rad * rad;
@@ -111,10 +89,10 @@ void ori_par( const int octave,
11189
// int xmax = min(w - 2, (int)floor(x + rad));
11290
// int ymin = max(1, (int)floor(y - rad));
11391
// int ymax = min(h - 2, (int)floor(y + rad));
114-
int xmin = max(1, (int)roundf(x) - rad);
115-
int xmax = min(w - 2, (int)roundf(x) + rad);
116-
int ymin = max(1, (int)roundf(y) - rad);
117-
int ymax = min(h - 2, (int)roundf(y) + rad);
92+
int xmin = max(0, (int)roundf(x) - rad);
93+
int xmax = min(w - 1, (int)roundf(x) + rad);
94+
int ymin = max(0, (int)roundf(y) - rad);
95+
int ymax = min(h - 1, (int)roundf(y) + rad);
11896

11997
int wx = xmax - xmin + 1;
12098
int hy = ymax - ymin + 1;
@@ -136,88 +114,74 @@ void ori_par( const int octave,
136114
layer,
137115
level );
138116

117+
grad /= 2; // our grad is twice that of VLFeat - weird
118+
if( theta < 0 ) theta += M_PI2;
119+
139120
float dx = xx - x;
140121
float dy = yy - y;
141122

142-
int sq_dist = dx * dx + dy * dy;
143-
if (sq_dist <= sq_thres)
123+
float sq_dist = dx * dx + dy * dy;
124+
if (sq_dist <= sq_thres + 0.6f)
144125
{
145126
float weight = grad * expf(sq_dist * factor);
146127

147-
// int bidx = (int)rintf( __fdividef( ORI_NBINS * (theta + M_PI), M_PI2 ) );
148128
int bidx = (int)roundf( __fdividef( float(ORI_NBINS) * (theta + M_PI), M_PI2 ) );
149129

150-
if( bidx > ORI_NBINS ) {
151-
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
152-
}
153-
if( bidx < 0 ) {
154-
printf("Crashing: bin %d theta %f :-)\n", bidx, theta);
155-
}
156-
157-
bidx = (bidx == ORI_NBINS) ? 0 : bidx;
130+
while( bidx < 0 ) bidx += ORI_NBINS;
131+
while( bidx >= ORI_NBINS ) bidx -= ORI_NBINS;
158132

159133
atomicAdd( &hist[bidx], weight );
160134
}
161135
}
162136
}
163137
__syncthreads();
164138

165-
#ifdef WITH_VLFEAT_SMOOTHING
166139
for( int i=0; i<3 ; i++ )
167140
{
168-
sm_hist[threadIdx.x+ 0] = smoothe<0>( hist, threadIdx.x+ 0 );
169-
sm_hist[threadIdx.x+32] = smoothe<1>( hist, threadIdx.x+32 );
141+
sm_hist[threadIdx.x+ 0] = smoothe( hist, threadIdx.x+ 0 );
142+
sm_hist[threadIdx.x+32] = smoothe( hist, threadIdx.x+32 );
170143
__syncthreads();
171-
hist[threadIdx.x+ 0] = smoothe<2>( sm_hist, threadIdx.x+ 0 );
172-
hist[threadIdx.x+32] = smoothe<3>( sm_hist, threadIdx.x+32 );
144+
hist[threadIdx.x+ 0] = smoothe( sm_hist, threadIdx.x+ 0 );
145+
hist[threadIdx.x+32] = smoothe( sm_hist, threadIdx.x+32 );
173146
__syncthreads();
174147
}
175148

176149
sm_hist[threadIdx.x+ 0] = hist[threadIdx.x+ 0];
177150
sm_hist[threadIdx.x+32] = hist[threadIdx.x+32];
178151
__syncthreads();
179-
#else // not WITH_VLFEAT_SMOOTHING
180-
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
181-
int prev2 = bin - 2;
182-
int prev1 = bin - 1;
183-
int next1 = bin + 1;
184-
int next2 = bin + 2;
185-
if( prev2 < 0 ) prev2 += ORI_NBINS;
186-
if( prev1 < 0 ) prev1 += ORI_NBINS;
187-
if( next1 >= ORI_NBINS ) next1 -= ORI_NBINS;
188-
if( next2 >= ORI_NBINS ) next2 -= ORI_NBINS;
189-
sm_hist[bin] = ( hist[prev2] + hist[next2]
190-
+ ( hist[prev1] + hist[next1] ) * 4.0f
191-
+ hist[bin] * 6.0f ) / 16.0f;
192-
}
193-
__syncthreads();
194-
#endif // not WITH_VLFEAT_SMOOTHING
152+
153+
if( threadIdx.x+32 >= ORI_NBINS ) sm_hist[threadIdx.x+32] = -INFINITY;
154+
float maxval = fmaxf( sm_hist[threadIdx.x+ 0], sm_hist[threadIdx.x+32] );
155+
float neigh;
156+
neigh = popsift::shuffle_down( maxval, 16 ); maxval = fmaxf( maxval, neigh );
157+
neigh = popsift::shuffle_down( maxval, 8 ); maxval = fmaxf( maxval, neigh );
158+
neigh = popsift::shuffle_down( maxval, 4 ); maxval = fmaxf( maxval, neigh );
159+
neigh = popsift::shuffle_down( maxval, 2 ); maxval = fmaxf( maxval, neigh );
160+
neigh = popsift::shuffle_down( maxval, 1 ); maxval = fmaxf( maxval, neigh );
161+
maxval = popsift::shuffle( maxval, 0 );
195162

196163
// sub-cell refinement of the histogram cell index, yielding the angle
197164
// not necessary to initialize, every cell is computed
198165

199-
for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) {
200-
const int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
201-
const int next = bin == ORI_NBINS-1 ? 0 : bin+1;
166+
for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x )
167+
{
168+
const int prev = ( bin - 1 + ORI_NBINS ) % ORI_NBINS;
169+
const int next = ( bin + 1 ) % ORI_NBINS;
202170

203-
bool predicate = ( bin < ORI_NBINS ) && ( sm_hist[bin] > max( sm_hist[prev], sm_hist[next] ) );
171+
bool predicate = ( bin < ORI_NBINS ) &&
172+
( sm_hist[bin] > max( sm_hist[prev], sm_hist[next] ) ) &&
173+
( sm_hist[bin] > 0.8f * maxval );
204174

205175
const float num = predicate ? 3.0f * sm_hist[prev]
206176
- 4.0f * sm_hist[bin]
207177
+ 1.0f * sm_hist[next]
208178
: 0.0f;
209-
// const float num = predicate ? 2.0f * sm_hist[prev]
210-
// - 4.0f * sm_hist[bin]
211-
// + 2.0f * sm_hist[next]
212-
// : 0.0f;
213179
const float denB = predicate ? 2.0f * ( sm_hist[prev] - 2.0f * sm_hist[bin] + sm_hist[next] ) : 1.0f;
214180

215181
const float newbin = __fdividef( num, denB ); // verified: accuracy OK
216182

217-
predicate = ( predicate && newbin >= 0.0f && newbin <= 2.0f );
218-
219183
refined_angle[bin] = predicate ? prev + newbin : -1;
220-
yval[bin] = predicate ? -(num*num) / (4.0f * denB) + sm_hist[prev] : -INFINITY;
184+
yval[bin] = predicate ? -(num*num) / (4.0f * denB) + hist[prev] : -INFINITY;
221185
}
222186
__syncthreads();
223187

@@ -229,9 +193,7 @@ void ori_par( const int octave,
229193

230194
// All threads retrieve the yval of thread 0, the largest
231195
// of all yvals.
232-
const float best_val = yval[best_index.x];
233-
const float yval_ref = 0.8f * popsift::shuffle( best_val, 0 );
234-
const bool valid = ( best_val >= yval_ref );
196+
const bool valid = ( yval[best_index.x] > 0 );
235197
bool written = false;
236198

237199
Extremum* ext = &dobuf.extrema[ext_ct_prefix_sum + extremum_index];
@@ -240,8 +202,10 @@ void ori_par( const int octave,
240202
if( valid ) {
241203
float chosen_bin = refined_angle[best_index.x];
242204
if( chosen_bin >= ORI_NBINS ) chosen_bin -= ORI_NBINS;
243-
// float th = __fdividef(M_PI2 * chosen_bin , ORI_NBINS) - M_PI;
244-
float th = ::fmaf( M_PI2 * chosen_bin, 1.0f/ORI_NBINS, - M_PI );
205+
if( chosen_bin < 0 ) chosen_bin += ORI_NBINS;
206+
float th = __fdividef(M_PI2 * chosen_bin , ORI_NBINS); // - M_PI;
207+
th -= M_PI;
208+
if( th < 0.0f ) th += M_PI2;
245209
ext->orientation[threadIdx.x] = th;
246210
written = true;
247211
}

0 commit comments

Comments
 (0)