Skip to content

Commit 8fb65a3

Browse files
author
Carsten Griwodz
committed
introduce CUDA managed memory and use it for a matching function
1 parent a19d394 commit 8fb65a3

File tree

5 files changed

+141
-8
lines changed

5 files changed

+141
-8
lines changed

src/application/match.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,24 @@ int main(int argc, char **argv)
252252
cout << "Number of features: " << rFeatures->getFeatureCount() << endl;
253253
cout << "Number of descriptors: " << rFeatures->getDescriptorCount() << endl;
254254

255-
lFeatures->match( rFeatures );
255+
int3* matches = lFeatures->matchAndReturn( rFeatures );
256+
// lFeatures->match( rFeatures );
257+
cudaDeviceSynchronize();
258+
259+
for( int i=0; i<lFeatures->getDescriptorCount(); i++ )
260+
{
261+
int3& match = matches[i];
262+
if( match.z )
263+
{
264+
const popsift::Feature* l_f = lFeatures->getFeatureForDescriptor( i );
265+
const popsift::Feature* r_f = rFeatures->getFeatureForDescriptor( match.x );
266+
cout << setprecision(5) << showpoint
267+
<< "point (" << l_f->xpos << "," << l_f->ypos << ") in l matches "
268+
<< "point (" << r_f->xpos << "," << r_f->ypos << ") in r" << endl;
269+
}
270+
}
271+
272+
lFeatures->freeMatches( matches );
256273

257274
delete lFeatures;
258275
delete rFeatures;

src/popsift/common/debug_macros.cu

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,23 @@ void malloc_hst( void** ptr, int sz,
4949
}
5050
} }
5151

52+
namespace popsift { namespace cuda {
53+
void malloc_mgd( void** ptr, int sz,
54+
const char* file, int line )
55+
{
56+
cudaError_t err;
57+
err = cudaMallocManaged( ptr, sz );
58+
if( err != cudaSuccess ) {
59+
std::cerr << file << ":" << line << std::endl
60+
<< " cudaMallocManaged failed: " << cudaGetErrorString(err) << std::endl;
61+
exit( -__LINE__ );
62+
}
63+
#ifdef DEBUG_INIT_DEVICE_ALLOCATIONS
64+
memset( *ptr, 0, sz );
65+
#endif // NDEBUG
66+
}
67+
} }
68+
5269
namespace popsift { namespace cuda {
5370
void memcpy_async( void* dst, const void* src, size_t sz,
5471
cudaMemcpyKind type, cudaStream_t stream,

src/popsift/common/debug_macros.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ void malloc_dev( void** ptr, int sz,
3939
void malloc_hst( void** ptr, int sz,
4040
const char* file, int line );
4141

42+
void malloc_mgd( void** ptr, int sz,
43+
const char* file, int line );
44+
4245
template<class T>
4346
T* malloc_devT( int num, const char* file, int line ) {
4447
void* ptr;
@@ -53,6 +56,17 @@ T* malloc_hstT( int num, const char* file, int line ) {
5356
return (T*)ptr;
5457
}
5558

59+
template<class T>
60+
T* malloc_mgdT( int num, const char* file, int line ) {
61+
void* ptr;
62+
malloc_mgd( &ptr, num*sizeof(T), file, line );
63+
return (T*)ptr;
64+
}
65+
66+
inline void free_mgd( void* ptr ) {
67+
cudaFree( ptr );
68+
}
69+
5670
void memcpy_sync( void* dst, const void* src, size_t sz,
5771
cudaMemcpyKind type,
5872
const char* file, size_t line );

src/popsift/features.cu

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ void FeaturesDev::reset( int num_ext, int num_ori )
154154
if( _ori != nullptr ) { cudaFree( _ori ); _ori = nullptr; }
155155
if( _rev != nullptr ) { cudaFree( _rev ); _rev = nullptr; }
156156

157-
_ext = popsift::cuda::malloc_devT<Feature> ( num_ext, __FILE__, __LINE__ );
158-
_ori = popsift::cuda::malloc_devT<Descriptor>( num_ori, __FILE__, __LINE__ );
159-
_rev = popsift::cuda::malloc_devT<int> ( num_ori, __FILE__, __LINE__ );
157+
_ext = popsift::cuda::malloc_mgdT<Feature> ( num_ext, __FILE__, __LINE__ );
158+
_ori = popsift::cuda::malloc_mgdT<Descriptor>( num_ori, __FILE__, __LINE__ );
159+
_rev = popsift::cuda::malloc_mgdT<int> ( num_ori, __FILE__, __LINE__ );
160160

161161
setFeatureCount( num_ext );
162162
setDescriptorCount( num_ori );
@@ -248,18 +248,26 @@ show_distance( int3* match_matrix,
248248
if( threadIdx.x == 0 )
249249
{
250250
if( match_matrix[i].z )
251-
printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n",
251+
{
252+
Feature* lx = &l_ext[l_fem[i]];
253+
Feature* rx = &r_ext[r_fem[match_matrix[i].x]];
254+
printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f"
255+
" (%.1f,%.1f)-(%.1f,%.1f)\n",
252256
l_fem[i], i,
253257
r_fem[match_matrix[i].x], match_matrix[i].x,
254258
r_fem[match_matrix[i].y], match_matrix[i].y,
255-
d1, d2 );
259+
d1, d2,
260+
lx->xpos, lx->ypos, rx->xpos, rx->ypos );
261+
}
256262
else
263+
{
257264
printf( "reject feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n",
258265
l_fem[i], i,
259266
r_fem[match_matrix[i].x], match_matrix[i].x,
260267
r_fem[match_matrix[i].y], match_matrix[i].y,
261268
d1, d2 );
262269
}
270+
}
263271
__syncthreads();
264272
}
265273
}
@@ -303,6 +311,54 @@ void FeaturesDev::match( FeaturesDev* other )
303311
cudaFree( match_matrix );
304312
}
305313

314+
int3* FeaturesDev::matchAndReturn( FeaturesDev* other )
315+
{
316+
int l_len = getDescriptorCount( );
317+
int r_len = other->getDescriptorCount( );
318+
319+
int3* match_matrix = popsift::cuda::malloc_mgdT<int3>( l_len, __FILE__, __LINE__ );
320+
321+
dim3 grid;
322+
grid.x = l_len;
323+
grid.y = 1;
324+
grid.z = 1;
325+
dim3 block;
326+
block.x = 32;
327+
block.y = 1;
328+
block.z = 1;
329+
330+
compute_distance
331+
<<<grid,block>>>
332+
( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len );
333+
334+
return match_matrix;
335+
}
336+
337+
void FeaturesDev::freeMatches( int3* match_matrix )
338+
{
339+
popsift::cuda::free_mgd( match_matrix );
340+
}
341+
342+
Descriptor* FeaturesDev::getDescriptor( int descIndex )
343+
{
344+
return &_ori[descIndex];
345+
}
346+
347+
const Descriptor* FeaturesDev::getDescriptor( int descIndex ) const
348+
{
349+
return &_ori[descIndex];
350+
}
351+
352+
Feature* FeaturesDev::getFeatureForDescriptor( int descIndex )
353+
{
354+
return &_ext[_rev[descIndex]];
355+
}
356+
357+
const Feature* FeaturesDev::getFeatureForDescriptor( int descIndex ) const
358+
{
359+
return &_ext[_rev[descIndex]];
360+
}
361+
306362
/*************************************************************
307363
* Feature
308364
*************************************************************/

src/popsift/features.h

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ std::ostream& operator<<( std::ostream& ostr, const FeaturesHost& feature );
103103

104104
class FeaturesDev : public FeaturesBase
105105
{
106-
Feature* _ext;
107-
Descriptor* _ori;
106+
Feature* _ext; // array of extrema
107+
Descriptor* _ori; // array of desciptors
108108
int* _rev; // the reverse map from descriptors to extrema
109109

110110
public:
@@ -114,11 +114,40 @@ class FeaturesDev : public FeaturesBase
114114

115115
void reset( int num_ext, int num_ori );
116116

117+
/** This function performs one-directional brute force matching on
118+
* the GPU between the Descriptors in this objects and the object
119+
* other.
120+
* The resulting matches are printed.
121+
*/
117122
void match( FeaturesDev* other );
118123

124+
/** This function performs one-directional brute force matching on
125+
* the GPU between the Descriptors in this objects and the object
126+
* other.
127+
* The resulting matches are returned in an array of int3 that must
128+
* be released with a call to cudaFree().
129+
* The length of the array is this->getDescriptorCount().
130+
* For each element at position i
131+
* i is the index of a descriptor in this->getDescriptors()
132+
* int3.x is the index of the best match in other->getDescriptors()
133+
* int3.y is the index of the second best match in other->getDescriptors()
134+
* int3.z indicates if the match is valid (non-zero) or not (zero)
135+
*/
136+
int3* matchAndReturn( FeaturesDev* other );
137+
138+
/** This function takes as parameters that matches returned by
139+
* matchAndReturn and releases that memory.
140+
*/
141+
void freeMatches( int3* match_matrix );
142+
119143
inline Feature* getFeatures() { return _ext; }
120144
inline Descriptor* getDescriptors() { return _ori; }
121145
inline int* getReverseMap() { return _rev; }
146+
147+
Descriptor* getDescriptor( int descIndex );
148+
const Descriptor* getDescriptor( int descIndex ) const;
149+
Feature* getFeatureForDescriptor( int descIndex );
150+
const Feature* getFeatureForDescriptor( int descIndex ) const;
122151
};
123152

124153
} // namespace popsift

0 commit comments

Comments
 (0)