diff --git a/src/application/match.cpp b/src/application/match.cpp index 3a1af9fa..39361d72 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -252,7 +252,24 @@ int main(int argc, char **argv) cout << "Number of features: " << rFeatures->getFeatureCount() << endl; cout << "Number of descriptors: " << rFeatures->getDescriptorCount() << endl; - lFeatures->match( rFeatures ); + int3* matches = lFeatures->matchAndReturn( rFeatures ); + // lFeatures->match( rFeatures ); + cudaDeviceSynchronize(); + + for( int i=0; igetDescriptorCount(); i++ ) + { + int3& match = matches[i]; + if( match.z ) + { + const popsift::Feature* l_f = lFeatures->getFeatureForDescriptor( i ); + const popsift::Feature* r_f = rFeatures->getFeatureForDescriptor( match.x ); + cout << setprecision(5) << showpoint + << "point (" << l_f->xpos << "," << l_f->ypos << ") in l matches " + << "point (" << r_f->xpos << "," << r_f->ypos << ") in r" << endl; + } + } + + lFeatures->freeMatches( matches ); delete lFeatures; delete rFeatures; diff --git a/src/popsift/common/debug_macros.cu b/src/popsift/common/debug_macros.cu index c9155248..06dc3233 100755 --- a/src/popsift/common/debug_macros.cu +++ b/src/popsift/common/debug_macros.cu @@ -49,6 +49,23 @@ void malloc_hst( void** ptr, int sz, } } } +namespace popsift { namespace cuda { +void malloc_mgd( void** ptr, int sz, + const char* file, int line ) +{ + cudaError_t err; + err = cudaMallocManaged( ptr, sz ); + if( err != cudaSuccess ) { + std::cerr << file << ":" << line << std::endl + << " cudaMallocManaged failed: " << cudaGetErrorString(err) << std::endl; + exit( -__LINE__ ); + } +#ifdef DEBUG_INIT_DEVICE_ALLOCATIONS + memset( *ptr, 0, sz ); +#endif // NDEBUG +} +} } + namespace popsift { namespace cuda { void memcpy_async( void* dst, const void* src, size_t sz, cudaMemcpyKind type, cudaStream_t stream, diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h index a497750c..93a505f7 100755 --- a/src/popsift/common/debug_macros.h +++ b/src/popsift/common/debug_macros.h @@ -39,6 +39,9 @@ void malloc_dev( void** ptr, int sz, void malloc_hst( void** ptr, int sz, const char* file, int line ); +void malloc_mgd( void** ptr, int sz, + const char* file, int line ); + template T* malloc_devT( int num, const char* file, int line ) { void* ptr; @@ -53,6 +56,17 @@ T* malloc_hstT( int num, const char* file, int line ) { return (T*)ptr; } +template +T* malloc_mgdT( int num, const char* file, int line ) { + void* ptr; + malloc_mgd( &ptr, num*sizeof(T), file, line ); + return (T*)ptr; +} + +inline void free_mgd( void* ptr ) { + cudaFree( ptr ); +} + void memcpy_sync( void* dst, const void* src, size_t sz, cudaMemcpyKind type, const char* file, size_t line ); diff --git a/src/popsift/features.cu b/src/popsift/features.cu index 5aa706a1..2c4929af 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -154,9 +154,9 @@ void FeaturesDev::reset( int num_ext, int num_ori ) if( _ori != nullptr ) { cudaFree( _ori ); _ori = nullptr; } if( _rev != nullptr ) { cudaFree( _rev ); _rev = nullptr; } - _ext = popsift::cuda::malloc_devT ( num_ext, __FILE__, __LINE__ ); - _ori = popsift::cuda::malloc_devT( num_ori, __FILE__, __LINE__ ); - _rev = popsift::cuda::malloc_devT ( num_ori, __FILE__, __LINE__ ); + _ext = popsift::cuda::malloc_mgdT ( num_ext, __FILE__, __LINE__ ); + _ori = popsift::cuda::malloc_mgdT( num_ori, __FILE__, __LINE__ ); + _rev = popsift::cuda::malloc_mgdT ( num_ori, __FILE__, __LINE__ ); setFeatureCount( num_ext ); setDescriptorCount( num_ori ); @@ -248,18 +248,26 @@ show_distance( int3* match_matrix, if( threadIdx.x == 0 ) { if( match_matrix[i].z ) - printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n", + { + Feature* lx = &l_ext[l_fem[i]]; + Feature* rx = &r_ext[r_fem[match_matrix[i].x]]; + printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f" + " (%.1f,%.1f)-(%.1f,%.1f)\n", l_fem[i], i, r_fem[match_matrix[i].x], match_matrix[i].x, r_fem[match_matrix[i].y], match_matrix[i].y, - d1, d2 ); + d1, d2, + lx->xpos, lx->ypos, rx->xpos, rx->ypos ); + } else + { printf( "reject feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n", l_fem[i], i, r_fem[match_matrix[i].x], match_matrix[i].x, r_fem[match_matrix[i].y], match_matrix[i].y, d1, d2 ); } + } __syncthreads(); } } @@ -303,6 +311,54 @@ void FeaturesDev::match( FeaturesDev* other ) cudaFree( match_matrix ); } +int3* FeaturesDev::matchAndReturn( FeaturesDev* other ) +{ + int l_len = getDescriptorCount( ); + int r_len = other->getDescriptorCount( ); + + int3* match_matrix = popsift::cuda::malloc_mgdT( l_len, __FILE__, __LINE__ ); + + dim3 grid; + grid.x = l_len; + grid.y = 1; + grid.z = 1; + dim3 block; + block.x = 32; + block.y = 1; + block.z = 1; + + compute_distance + <<>> + ( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len ); + + return match_matrix; +} + +void FeaturesDev::freeMatches( int3* match_matrix ) +{ + popsift::cuda::free_mgd( match_matrix ); +} + +Descriptor* FeaturesDev::getDescriptor( int descIndex ) +{ + return &_ori[descIndex]; +} + +const Descriptor* FeaturesDev::getDescriptor( int descIndex ) const +{ + return &_ori[descIndex]; +} + +Feature* FeaturesDev::getFeatureForDescriptor( int descIndex ) +{ + return &_ext[_rev[descIndex]]; +} + +const Feature* FeaturesDev::getFeatureForDescriptor( int descIndex ) const +{ + return &_ext[_rev[descIndex]]; +} + /************************************************************* * Feature *************************************************************/ diff --git a/src/popsift/features.h b/src/popsift/features.h index 3b16f954..9cd8f2d2 100755 --- a/src/popsift/features.h +++ b/src/popsift/features.h @@ -103,8 +103,8 @@ std::ostream& operator<<( std::ostream& ostr, const FeaturesHost& feature ); class FeaturesDev : public FeaturesBase { - Feature* _ext; - Descriptor* _ori; + Feature* _ext; // array of extrema + Descriptor* _ori; // array of desciptors int* _rev; // the reverse map from descriptors to extrema public: @@ -114,11 +114,40 @@ class FeaturesDev : public FeaturesBase void reset( int num_ext, int num_ori ); + /** This function performs one-directional brute force matching on + * the GPU between the Descriptors in this objects and the object + * other. + * The resulting matches are printed. + */ void match( FeaturesDev* other ); + /** This function performs one-directional brute force matching on + * the GPU between the Descriptors in this objects and the object + * other. + * The resulting matches are returned in an array of int3 that must + * be released with a call to cudaFree(). + * The length of the array is this->getDescriptorCount(). + * For each element at position i + * i is the index of a descriptor in this->getDescriptors() + * int3.x is the index of the best match in other->getDescriptors() + * int3.y is the index of the second best match in other->getDescriptors() + * int3.z indicates if the match is valid (non-zero) or not (zero) + */ + int3* matchAndReturn( FeaturesDev* other ); + + /** This function takes as parameters that matches returned by + * matchAndReturn and releases that memory. + */ + void freeMatches( int3* match_matrix ); + inline Feature* getFeatures() { return _ext; } inline Descriptor* getDescriptors() { return _ori; } inline int* getReverseMap() { return _rev; } + + Descriptor* getDescriptor( int descIndex ); + const Descriptor* getDescriptor( int descIndex ) const; + Feature* getFeatureForDescriptor( int descIndex ); + const Feature* getFeatureForDescriptor( int descIndex ) const; }; } // namespace popsift