Skip to content

Commit 5bbd332

Browse files
authored
Merge pull request #121 from mitjap/multi_gpu
add support for device selection and multiple GPUs
2 parents fafcad9 + e9cd9ad commit 5bbd332

File tree

9 files changed

+67
-33
lines changed

9 files changed

+67
-33
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323
- CMake: support for cuda 11 [PR](https://github.com/alicevision/popsift/pull/103)
2424
- Support for Cuda CC 7 cards (RTX 2080) [PR](https://github.com/alicevision/popsift/pull/67)
2525
- Support for Boost 1.70 [PR](https://github.com/alicevision/popsift/pull/65)
26+
- Support for device selection and multiple GPUs [PR](https://github.com/alicevision/popsift/pull/121)
2627

2728
### Fixed
2829
- CMake: fixes to allow building on Windows using vcpkg [PR](https://github.com/alicevision/popsift/pull/92)

src/popsift/gauss_filter.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace popsift {
1818
__device__ __constant__
1919
GaussInfo d_gauss;
2020

21-
__align__(128) GaussInfo h_gauss;
21+
__align__(128) thread_local GaussInfo h_gauss;
2222

2323

2424
__global__

src/popsift/gauss_filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ struct GaussInfo
105105
};
106106

107107
extern __device__ __constant__ GaussInfo d_gauss;
108-
extern GaussInfo h_gauss;
108+
extern thread_local GaussInfo h_gauss;
109109

110110
/* init_filter must be called early to initialize the Gauss tables.
111111
*/

src/popsift/popsift.cpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919

2020
using namespace std;
2121

22-
PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode )
22+
PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode, int device )
2323
: _image_mode( imode )
24+
, _device(device)
2425
{
26+
cudaSetDevice(_device);
27+
configure(config);
28+
2529
if( imode == ByteImages )
2630
{
2731
_pipe._unused.push( new popsift::Image);
@@ -33,18 +37,19 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode
3337
_pipe._unused.push( new popsift::ImageFloat );
3438
}
3539

36-
configure( config, true );
37-
3840
_pipe._thread_stage1.reset( new std::thread( &PopSift::uploadImages, this ));
3941
if( mode == popsift::Config::ExtractingMode )
4042
_pipe._thread_stage2.reset( new std::thread( &PopSift::extractDownloadLoop, this ));
4143
else
4244
_pipe._thread_stage2.reset( new std::thread( &PopSift::matchPrepareLoop, this ));
4345
}
4446

45-
PopSift::PopSift( ImageMode imode )
47+
PopSift::PopSift( ImageMode imode, int device )
4648
: _image_mode( imode )
49+
, _device(device)
4750
{
51+
cudaSetDevice(_device);
52+
4853
if( imode == ByteImages )
4954
{
5055
_pipe._unused.push( new popsift::Image);
@@ -68,16 +73,20 @@ PopSift::~PopSift()
6873
}
6974
}
7075

71-
bool PopSift::configure( const popsift::Config& config, bool force )
76+
bool PopSift::configure( const popsift::Config& config, bool /*force*/ )
7277
{
7378
if( _pipe._pyramid != nullptr ) {
7479
return false;
7580
}
7681

7782
_config = config;
78-
7983
_config.levels = max( 2, config.levels );
8084

85+
return true;
86+
}
87+
88+
bool PopSift::applyConfiguration(bool force)
89+
{
8190
if( force || ( _config != _shadow_config ) )
8291
{
8392
popsift::init_filter( _config,
@@ -131,6 +140,16 @@ bool PopSift::private_init( int w, int h )
131140
return true;
132141
}
133142

143+
bool PopSift::private_uninit()
144+
{
145+
Pipe& p = _pipe;
146+
147+
delete p._pyramid;
148+
p._pyramid = nullptr;
149+
150+
return true;
151+
}
152+
134153
void PopSift::uninit( )
135154
{
136155
if(!_isInit)
@@ -273,6 +292,8 @@ SiftJob* PopSift::enqueue( int w,
273292

274293
void PopSift::uploadImages( )
275294
{
295+
cudaSetDevice(_device);
296+
276297
SiftJob* job;
277298
while( ( job = _pipe._queue_stage1.pull() ) != nullptr ) {
278299
popsift::ImageBase* img = _pipe._unused.pull();
@@ -284,10 +305,15 @@ void PopSift::uploadImages( )
284305

285306
void PopSift::extractDownloadLoop( )
286307
{
308+
cudaSetDevice(_device);
309+
applyConfiguration(true);
310+
287311
Pipe& p = _pipe;
288312

289313
SiftJob* job;
290314
while( ( job = p._queue_stage2.pull() ) != nullptr ) {
315+
applyConfiguration();
316+
291317
popsift::ImageBase* img = job->getImg();
292318

293319
private_init( img->getWidth(), img->getHeight() );
@@ -313,14 +339,21 @@ void PopSift::extractDownloadLoop( )
313339

314340
job->setFeatures( features );
315341
}
342+
343+
private_uninit();
316344
}
317345

318346
void PopSift::matchPrepareLoop( )
319347
{
348+
cudaSetDevice(_device);
349+
applyConfiguration(true);
350+
320351
Pipe& p = _pipe;
321352

322353
SiftJob* job;
323354
while( ( job = p._queue_stage2.pull() ) != nullptr ) {
355+
applyConfiguration();
356+
324357
popsift::ImageBase* img = job->getImg();
325358

326359
private_init( img->getWidth(), img->getHeight() );
@@ -336,6 +369,8 @@ void PopSift::matchPrepareLoop( )
336369

337370
job->setFeatures( features );
338371
}
372+
373+
private_uninit();
339374
}
340375

341376
SiftJob::SiftJob( int w, int h, const unsigned char* imageData )
@@ -445,8 +480,4 @@ void PopSift::Pipe::uninit()
445480
popsift::ImageBase* img = _unused.pull();
446481
delete img;
447482
}
448-
449-
delete _pyramid;
450-
_pyramid = nullptr;
451-
452483
}

src/popsift/popsift.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ class PopSift
150150
* @brief We support more than 1 streams, but we support only one sigma and one
151151
* level parameters.
152152
*/
153-
explicit PopSift( ImageMode imode = ByteImages );
153+
explicit PopSift( ImageMode imode = ByteImages, int device = 0 );
154154

155155
/**
156156
* @brief
@@ -160,7 +160,7 @@ class PopSift
160160
*/
161161
explicit PopSift(const popsift::Config& config,
162162
popsift::Config::ProcessingMode mode = popsift::Config::ExtractingMode,
163-
ImageMode imode = ByteImages);
163+
ImageMode imode = ByteImages, int device = 0);
164164

165165
/**
166166
* @brief Release all the resources.
@@ -273,7 +273,10 @@ class PopSift
273273
}
274274

275275
private:
276+
bool applyConfiguration( bool force = false );
277+
276278
bool private_init( int w, int h );
279+
bool private_uninit( );
277280
void private_apply_scale_factor( int& w, int& h );
278281
void uploadImages( );
279282

@@ -299,6 +302,7 @@ class PopSift
299302
int _last_init_w{}; /* to support deprecated interface */
300303
int _last_init_h{}; /* to support deprecated interface */
301304
ImageMode _image_mode;
305+
int _device;
302306

303307
/// whether the object is initialized
304308
bool _isInit{true};

src/popsift/sift_constants.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ using namespace std;
1616

1717
namespace popsift {
1818

19-
ConstInfo h_consts;
19+
thread_local ConstInfo h_consts;
2020
__device__ __constant__ ConstInfo d_consts;
2121

2222
void init_constants( float sigma0, int levels, float threshold, float edge_limit, int max_extrema, int normalization_multiplier )

src/popsift/sift_constants.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct ConstInfo
6868
float desc_tile[16];
6969
};
7070

71-
extern ConstInfo h_consts;
71+
extern thread_local ConstInfo h_consts;
7272
extern __device__ __constant__ ConstInfo d_consts;
7373

7474

src/popsift/sift_pyramid.cu

100755100644
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,15 @@ using namespace std;
3838

3939
namespace popsift {
4040

41-
__device__
42-
ExtremaCounters dct;
43-
ExtremaCounters hct;
41+
__device__ ExtremaCounters dct;
42+
thread_local ExtremaCounters hct;
4443

45-
__device__
46-
ExtremaBuffers dbuf;
47-
ExtremaBuffers dbuf_shadow; // just for managing memories
48-
ExtremaBuffers hbuf;
44+
__device__ ExtremaBuffers dbuf;
45+
thread_local ExtremaBuffers dbuf_shadow; // just for managing memories
46+
thread_local ExtremaBuffers hbuf;
4947

50-
__device__
51-
DevBuffers dobuf;
52-
DevBuffers dobuf_shadow; // just for managing memories
48+
__device__ DevBuffers dobuf;
49+
thread_local DevBuffers dobuf_shadow; // just for managing memories
5350

5451
__global__
5552
void py_print_corner_float(float* img, uint32_t pitch, uint32_t height, uint32_t level)
@@ -215,6 +212,7 @@ Pyramid::~Pyramid()
215212
{
216213
cudaStreamDestroy( _download_stream );
217214

215+
cudaFree( _d_extrema_num_blocks );
218216
cudaFree( dobuf_shadow.i_ext_dat[0] );
219217
cudaFree( dobuf_shadow.i_ext_off[0] );
220218
cudaFree( dobuf_shadow.features );

src/popsift/sift_pyramid.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,13 @@ struct DevBuffers
5050
Feature* features;
5151
};
5252

53-
extern ExtremaCounters hct;
54-
extern __device__ ExtremaCounters dct;
55-
extern ExtremaBuffers hbuf;
56-
extern __device__ ExtremaBuffers dbuf;
57-
extern ExtremaBuffers dbuf_shadow; // just for managing memories
58-
extern __device__ DevBuffers dobuf;
59-
extern DevBuffers dobuf_shadow; // just for managing memories
53+
extern thread_local ExtremaCounters hct;
54+
extern __device__ ExtremaCounters dct;
55+
extern thread_local ExtremaBuffers hbuf;
56+
extern __device__ ExtremaBuffers dbuf;
57+
extern thread_local ExtremaBuffers dbuf_shadow; // just for managing memories
58+
extern __device__ DevBuffers dobuf;
59+
extern thread_local DevBuffers dobuf_shadow; // just for managing memories
6060

6161
class Pyramid
6262
{

0 commit comments

Comments
 (0)