@@ -71,10 +71,10 @@ void ori_par( const int octave,
7171 const int iext_off = dobuf.i_ext_off [octave][extremum_index];
7272 const InitialExtremum* iext = &dobuf.i_ext_dat [octave][iext_off];
7373
74- __shared__ float hist [ORI_NBINS];
75- __shared__ float sm_hist [ORI_NBINS];
74+ __shared__ float _sh_hist [ORI_NBINS];
75+ __shared__ float _sh_sm_hist [ORI_NBINS];
7676
77- for ( int i = threadIdx .x ; i < ORI_NBINS; i += blockDim .x ) hist [i] = 0 .0f ;
77+ for ( int i = threadIdx .x ; i < ORI_NBINS; i += blockDim .x ) _sh_hist [i] = 0 .0f ;
7878
7979 /* keypoint fractional geometry */
8080 const float x = iext->xpos ;
@@ -133,7 +133,7 @@ void ori_par( const int octave,
133133
134134 bidx = (bidx == ORI_NBINS) ? 0 : bidx;
135135
136- atomicAdd ( &hist [bidx], weight );
136+ atomicAdd ( &_sh_hist [bidx], weight );
137137 }
138138 }
139139 __syncthreads ();
@@ -144,18 +144,18 @@ void ori_par( const int octave,
144144 for ( int bin = threadIdx .x ; bin < ORI_NBINS; bin += blockDim .x ) {
145145 int prev = bin == 0 ? ORI_NBINS-1 : bin-1 ;
146146 int next = bin == ORI_NBINS-1 ? 0 : bin+1 ;
147- sm_hist [bin] = ( hist [prev] + hist [bin] + hist [next] ) / 3 .0f ;
147+ _sh_sm_hist [bin] = ( _sh_hist [prev] + _sh_hist [bin] + _sh_hist [next] ) / 3 .0f ;
148148 }
149149 __syncthreads ();
150150 for ( int bin = threadIdx .x ; bin < ORI_NBINS; bin += blockDim .x ) {
151151 int prev = bin == 0 ? ORI_NBINS-1 : bin-1 ;
152152 int next = bin == ORI_NBINS-1 ? 0 : bin+1 ;
153- hist [bin] = ( sm_hist [prev] + sm_hist [bin] + sm_hist [next] ) / 3 .0f ;
153+ _sh_hist [bin] = ( _sh_sm_hist [prev] + _sh_sm_hist [bin] + _sh_sm_hist [next] ) / 3 .0f ;
154154 }
155155 __syncthreads ();
156156 }
157157 for ( int bin = threadIdx .x ; bin < ORI_NBINS; bin += blockDim .x ) {
158- sm_hist [bin] = hist [bin];
158+ _sh_sm_hist [bin] = _sh_hist [bin];
159159 }
160160 __syncthreads ();
161161#else // not WITH_VLFEAT_SMOOTHING
@@ -168,51 +168,51 @@ void ori_par( const int octave,
168168 if ( prev1 < 0 ) prev1 += ORI_NBINS;
169169 if ( next1 >= ORI_NBINS ) next1 -= ORI_NBINS;
170170 if ( next2 >= ORI_NBINS ) next2 -= ORI_NBINS;
171- sm_hist [bin] = ( hist [prev2] + hist [next2]
172- + ( hist [prev1] + hist [next1] ) * 4 .0f
173- + hist [bin] * 6 .0f ) / 16 .0f ;
171+ _sh_sm_hist [bin] = ( _sh_hist [prev2] + _sh_hist [next2]
172+ + ( _sh_hist [prev1] + _sh_hist [next1] ) * 4 .0f
173+ + _sh_hist [bin] * 6 .0f ) / 16 .0f ;
174174 }
175175 __syncthreads ();
176176#endif // not WITH_VLFEAT_SMOOTHING
177177
178178 // sub-cell refinement of the histogram cell index, yielding the angle
179179 // not necessary to initialize, every cell is computed
180- __shared__ float refined_angle [64 ];
181- __shared__ float yval [64 ];
180+ __shared__ float _sh_refined_angle [64 ];
181+ __shared__ float _sh_yval [64 ];
182182
183183 for ( int bin = threadIdx .x ; popsift::any ( bin < ORI_NBINS ); bin += blockDim .x ) {
184184 const int prev = bin == 0 ? ORI_NBINS-1 : bin-1 ;
185185 const int next = bin == ORI_NBINS-1 ? 0 : bin+1 ;
186186
187- bool predicate = ( bin < ORI_NBINS ) && ( sm_hist [bin] > max ( sm_hist [prev], sm_hist [next] ) );
187+ bool predicate = ( bin < ORI_NBINS ) && ( _sh_sm_hist [bin] > max ( _sh_sm_hist [prev], _sh_sm_hist [next] ) );
188188
189- const float num = predicate ? 3 .0f * sm_hist [prev]
190- - 4 .0f * sm_hist [bin]
191- + 1 .0f * sm_hist [next]
189+ const float num = predicate ? 3 .0f * _sh_sm_hist [prev]
190+ - 4 .0f * _sh_sm_hist [bin]
191+ + 1 .0f * _sh_sm_hist [next]
192192 : 0 .0f ;
193- // const float num = predicate ? 2.0f * sm_hist [prev]
194- // - 4.0f * sm_hist [bin]
195- // + 2.0f * sm_hist [next]
193+ // const float num = predicate ? 2.0f * _sh_sm_hist [prev]
194+ // - 4.0f * _sh_sm_hist [bin]
195+ // + 2.0f * _sh_sm_hist [next]
196196 // : 0.0f;
197- const float denB = predicate ? 2 .0f * ( sm_hist [prev] - 2 .0f * sm_hist [bin] + sm_hist [next] ) : 1 .0f ;
197+ const float denB = predicate ? 2 .0f * ( _sh_sm_hist [prev] - 2 .0f * _sh_sm_hist [bin] + _sh_sm_hist [next] ) : 1 .0f ;
198198
199199 const float newbin = __fdividef ( num, denB ); // verified: accuracy OK
200200
201201 predicate = ( predicate && newbin >= 0 .0f && newbin <= 2 .0f );
202202
203- refined_angle [bin] = predicate ? prev + newbin : -1 ;
204- yval [bin] = predicate ? -(num*num) / (4 .0f * denB) + sm_hist [prev] : -INFINITY;
203+ _sh_refined_angle [bin] = predicate ? prev + newbin : -1 ;
204+ _sh_yval [bin] = predicate ? -(num*num) / (4 .0f * denB) + _sh_sm_hist [prev] : -INFINITY;
205205 }
206206
207207 int2 best_index = make_int2 ( threadIdx .x , threadIdx .x + 32 );
208208
209- BitonicSort::Warp32<float > sorter ( yval );
209+ BitonicSort::Warp32<float > sorter ( _sh_yval );
210210 sorter.sort64 ( best_index );
211211 __syncthreads ();
212212
213213 // All threads retrieve the yval of thread 0, the largest
214214 // of all yvals.
215- const float best_val = yval [best_index.x ];
215+ const float best_val = _sh_yval [best_index.x ];
216216 const float yval_ref = 0 .8f * popsift::shuffle ( best_val, 0 );
217217 const bool valid = ( best_val >= yval_ref );
218218 bool written = false ;
@@ -221,7 +221,7 @@ void ori_par( const int octave,
221221
222222 if ( threadIdx .x < ORIENTATION_MAX_COUNT ) {
223223 if ( valid ) {
224- float chosen_bin = refined_angle [best_index.x ];
224+ float chosen_bin = _sh_refined_angle [best_index.x ];
225225 if ( chosen_bin >= ORI_NBINS ) chosen_bin -= ORI_NBINS;
226226 // float th = __fdividef(M_PI2 * chosen_bin , ORI_NBINS) - M_PI;
227227 float th = ::fmaf ( M_PI2 * chosen_bin, 1 .0f /ORI_NBINS, - M_PI );
0 commit comments