Skip to content

Commit 04abcc8

Browse files
author
Carsten Griwodz
committed
rename shared mem variables for faster reading
1 parent 6d973e7 commit 04abcc8

1 file changed

Lines changed: 25 additions & 25 deletions

File tree

src/popsift/s_orientation.cu

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ void ori_par( const int octave,
7171
const int iext_off = dobuf.i_ext_off[octave][extremum_index];
7272
const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off];
7373

74-
__shared__ float hist [ORI_NBINS];
75-
__shared__ float sm_hist[ORI_NBINS];
74+
__shared__ float _sh_hist [ORI_NBINS];
75+
__shared__ float _sh_sm_hist[ORI_NBINS];
7676

77-
for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f;
77+
for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) _sh_hist[i] = 0.0f;
7878

7979
/* keypoint fractional geometry */
8080
const float x = iext->xpos;
@@ -133,7 +133,7 @@ void ori_par( const int octave,
133133

134134
bidx = (bidx == ORI_NBINS) ? 0 : bidx;
135135

136-
atomicAdd( &hist[bidx], weight );
136+
atomicAdd( &_sh_hist[bidx], weight );
137137
}
138138
}
139139
__syncthreads();
@@ -144,18 +144,18 @@ void ori_par( const int octave,
144144
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
145145
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
146146
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
147-
sm_hist[bin] = ( hist[prev] + hist[bin] + hist[next] ) / 3.0f;
147+
_sh_sm_hist[bin] = ( _sh_hist[prev] + _sh_hist[bin] + _sh_hist[next] ) / 3.0f;
148148
}
149149
__syncthreads();
150150
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
151151
int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
152152
int next = bin == ORI_NBINS-1 ? 0 : bin+1;
153-
hist[bin] = ( sm_hist[prev] + sm_hist[bin] + sm_hist[next] ) / 3.0f;
153+
_sh_hist[bin] = ( _sh_sm_hist[prev] + _sh_sm_hist[bin] + _sh_sm_hist[next] ) / 3.0f;
154154
}
155155
__syncthreads();
156156
}
157157
for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) {
158-
sm_hist[bin] = hist[bin];
158+
_sh_sm_hist[bin] = _sh_hist[bin];
159159
}
160160
__syncthreads();
161161
#else // not WITH_VLFEAT_SMOOTHING
@@ -168,51 +168,51 @@ void ori_par( const int octave,
168168
if( prev1 < 0 ) prev1 += ORI_NBINS;
169169
if( next1 >= ORI_NBINS ) next1 -= ORI_NBINS;
170170
if( next2 >= ORI_NBINS ) next2 -= ORI_NBINS;
171-
sm_hist[bin] = ( hist[prev2] + hist[next2]
172-
+ ( hist[prev1] + hist[next1] ) * 4.0f
173-
+ hist[bin] * 6.0f ) / 16.0f;
171+
_sh_sm_hist[bin] = ( _sh_hist[prev2] + _sh_hist[next2]
172+
+ ( _sh_hist[prev1] + _sh_hist[next1] ) * 4.0f
173+
+ _sh_hist[bin] * 6.0f ) / 16.0f;
174174
}
175175
__syncthreads();
176176
#endif // not WITH_VLFEAT_SMOOTHING
177177

178178
// sub-cell refinement of the histogram cell index, yielding the angle
179179
// not necessary to initialize, every cell is computed
180-
__shared__ float refined_angle[64];
181-
__shared__ float yval [64];
180+
__shared__ float _sh_refined_angle[64];
181+
__shared__ float _sh_yval [64];
182182

183183
for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) {
184184
const int prev = bin == 0 ? ORI_NBINS-1 : bin-1;
185185
const int next = bin == ORI_NBINS-1 ? 0 : bin+1;
186186

187-
bool predicate = ( bin < ORI_NBINS ) && ( sm_hist[bin] > max( sm_hist[prev], sm_hist[next] ) );
187+
bool predicate = ( bin < ORI_NBINS ) && ( _sh_sm_hist[bin] > max( _sh_sm_hist[prev], _sh_sm_hist[next] ) );
188188

189-
const float num = predicate ? 3.0f * sm_hist[prev]
190-
- 4.0f * sm_hist[bin]
191-
+ 1.0f * sm_hist[next]
189+
const float num = predicate ? 3.0f * _sh_sm_hist[prev]
190+
- 4.0f * _sh_sm_hist[bin]
191+
+ 1.0f * _sh_sm_hist[next]
192192
: 0.0f;
193-
// const float num = predicate ? 2.0f * sm_hist[prev]
194-
// - 4.0f * sm_hist[bin]
195-
// + 2.0f * sm_hist[next]
193+
// const float num = predicate ? 2.0f * _sh_sm_hist[prev]
194+
// - 4.0f * _sh_sm_hist[bin]
195+
// + 2.0f * _sh_sm_hist[next]
196196
// : 0.0f;
197-
const float denB = predicate ? 2.0f * ( sm_hist[prev] - 2.0f * sm_hist[bin] + sm_hist[next] ) : 1.0f;
197+
const float denB = predicate ? 2.0f * ( _sh_sm_hist[prev] - 2.0f * _sh_sm_hist[bin] + _sh_sm_hist[next] ) : 1.0f;
198198

199199
const float newbin = __fdividef( num, denB ); // verified: accuracy OK
200200

201201
predicate = ( predicate && newbin >= 0.0f && newbin <= 2.0f );
202202

203-
refined_angle[bin] = predicate ? prev + newbin : -1;
204-
yval[bin] = predicate ? -(num*num) / (4.0f * denB) + sm_hist[prev] : -INFINITY;
203+
_sh_refined_angle[bin] = predicate ? prev + newbin : -1;
204+
_sh_yval[bin] = predicate ? -(num*num) / (4.0f * denB) + _sh_sm_hist[prev] : -INFINITY;
205205
}
206206

207207
int2 best_index = make_int2( threadIdx.x, threadIdx.x + 32 );
208208

209-
BitonicSort::Warp32<float> sorter( yval );
209+
BitonicSort::Warp32<float> sorter( _sh_yval );
210210
sorter.sort64( best_index );
211211
__syncthreads();
212212

213213
// All threads retrieve the yval of thread 0, the largest
214214
// of all yvals.
215-
const float best_val = yval[best_index.x];
215+
const float best_val = _sh_yval[best_index.x];
216216
const float yval_ref = 0.8f * popsift::shuffle( best_val, 0 );
217217
const bool valid = ( best_val >= yval_ref );
218218
bool written = false;
@@ -221,7 +221,7 @@ void ori_par( const int octave,
221221

222222
if( threadIdx.x < ORIENTATION_MAX_COUNT ) {
223223
if( valid ) {
224-
float chosen_bin = refined_angle[best_index.x];
224+
float chosen_bin = _sh_refined_angle[best_index.x];
225225
if( chosen_bin >= ORI_NBINS ) chosen_bin -= ORI_NBINS;
226226
// float th = __fdividef(M_PI2 * chosen_bin , ORI_NBINS) - M_PI;
227227
float th = ::fmaf( M_PI2 * chosen_bin, 1.0f/ORI_NBINS, - M_PI );

0 commit comments

Comments
 (0)