Skip to content

Commit c03d7bb

Browse files
author
bragadeesh
committed
Merge branch 'develop' of https://github.com/clMathLibraries/clFFT-private into develop
2 parents 8489d30 + 81be7cd commit c03d7bb

8 files changed

+285
-47
lines changed

src/client/client.cpp

+60-38
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
7373
std::vector< cl_device_id > device_id;
7474
cl_context context;
7575
cl_command_queue queue;
76-
cl_event outEvent = NULL;
7776
clfftPlanHandle plan_handle;
7877

7978
for (unsigned u = 0; u < max_dimensions; ++u) {
@@ -204,7 +203,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
204203

205204

206205
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
207-
0, NULL, &outEvent ),
206+
0, NULL, NULL ),
208207
"clEnqueueWriteBuffer failed" );
209208

210209
}
@@ -252,10 +251,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
252251

253252

254253
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
255-
0, NULL, &outEvent ),
254+
0, NULL, NULL ),
256255
"clEnqueueWriteBuffer failed" );
257256
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
258-
0, NULL, &outEvent ),
257+
0, NULL, NULL ),
259258
"clEnqueueWriteBuffer failed" );
260259
}
261260
break;
@@ -289,7 +288,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
289288

290289

291290
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
292-
0, NULL, &outEvent ),
291+
0, NULL, NULL ),
293292
"clEnqueueWriteBuffer failed" );
294293
}
295294
break;
@@ -325,10 +324,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
325324

326325

327326
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
328-
0, NULL, &outEvent ),
327+
0, NULL, NULL ),
329328
"clEnqueueWriteBuffer failed" );
330329
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
331-
0, NULL, &outEvent ),
330+
0, NULL, NULL ),
332331
"clEnqueueWriteBuffer failed" );
333332
}
334333
break;
@@ -373,7 +372,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
373372

374373

375374
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
376-
0, NULL, &outEvent ),
375+
0, NULL, NULL ),
377376
"clEnqueueWriteBuffer failed" );
378377
}
379378
break;
@@ -391,22 +390,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
391390
terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl;
392391
}
393392

394-
395393
// Timer module discovered and loaded successfully
396394
// Initialize function pointers to call into the shared module
397395
PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) );
398396

399-
// Create and initialize our timer class, if the external timer shared library loaded
400-
baseStatTimer* timer = NULL;
401-
size_t clFFTID = 0;
402-
if( get_timer )
403-
{
404-
timer = get_timer( CLFFT_GPU );
405-
timer->Reserve( 1, profile_count );
406-
timer->setNormalize( true );
407-
408-
clFFTID = timer->getUniqueID( "clFFT", 0 );
409-
}
410397

411398
OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
412399
OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, lengths ), "clfftCreateDefaultPlan failed" );
@@ -511,37 +498,64 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
511498
}
512499
}
513500

514-
// Loop as many times as the user specifies to average out the timings
515-
//
501+
516502
cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
517503

518-
Timer tr;
519-
tr.Start();
504+
// Execute once for basic functional test
505+
OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, NULL,
506+
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
507+
"clfftEnqueueTransform failed" );
520508

521-
for( cl_uint i = 0; i < profile_count; ++i )
522-
{
523-
if( timer ) timer->Start( clFFTID );
509+
OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
510+
524511

525-
OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent,
526-
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
527-
"clfftEnqueueTransform failed" );
512+
// Create and initialize our timer class, if the external timer shared library loaded
513+
baseStatTimer* timer = NULL;
514+
size_t clFFTID = 0;
515+
if( get_timer )
516+
{
517+
timer = get_timer( CLFFT_GPU );
518+
timer->Reserve( 1, profile_count );
519+
timer->setNormalize( true );
528520

529-
if( timer ) timer->Stop( clFFTID );
521+
clFFTID = timer->getUniqueID( "clFFT", 0 );
530522
}
531-
OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
532-
if(clMedBuffer) clReleaseMemObject(clMedBuffer);
533523

534-
double wtime = tr.Sample()/((double)profile_count);
535-
size_t totalLen = 1;
536-
for(int i=0; i<dim; i++) totalLen *= lengths[i];
537-
double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
524+
cl_event *outEvent = new cl_event[profile_count];
525+
for( cl_uint i = 0; i < profile_count; ++i ) outEvent[i] = 0;
538526

539527
if(profile_count > 1)
540528
{
529+
Timer tr;
530+
tr.Start();
531+
for( cl_uint i = 0; i < profile_count; ++i )
532+
{
533+
if( timer ) timer->Start( clFFTID );
534+
535+
OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
536+
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
537+
"clfftEnqueueTransform failed" );
538+
539+
if( timer ) timer->Stop( clFFTID );
540+
}
541+
OPENCL_V_THROW( clWaitForEvents ( profile_count, outEvent ), "clWaitForEvents failed" );
542+
543+
double wtime = tr.Sample()/((double)profile_count);
544+
545+
OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
546+
547+
size_t totalLen = 1;
548+
for(int i=0; i<dim; i++) totalLen *= lengths[i];
549+
double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
550+
551+
541552
tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
542553
tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;
554+
543555
}
544556

557+
if(clMedBuffer) clReleaseMemObject(clMedBuffer);
558+
545559
if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
546560
{
547561
// Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
@@ -553,6 +567,14 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
553567
/*****************/
554568
FreeSharedLibrary( timerLibHandle );
555569

570+
for( cl_uint i = 0; i < profile_count; ++i )
571+
{
572+
if(outEvent[i])
573+
clReleaseEvent(outEvent[i]);
574+
}
575+
576+
delete[] outEvent;
577+
556578
// Read and check output data
557579
// This check is not valid if the FFT is executed multiple times inplace.
558580
//
@@ -725,7 +747,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
725747
OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
726748
OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
727749

728-
cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
750+
cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, NULL );
729751
return 0;
730752
}
731753

src/client/openCL.misc.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,11 @@ std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
477477
int cleanupCL( cl_context* context, cl_command_queue* commandQueue,
478478
const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent )
479479
{
480-
if( *outEvent != NULL )
481-
OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
480+
if(outEvent != NULL)
481+
{
482+
if( *outEvent != NULL )
483+
OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
484+
}
482485

483486
releaseOpenCLMemBuffer( numBuffersIn, inputBuffer);
484487
releaseOpenCLMemBuffer( numBuffersOut, outputBuffer);

0 commit comments

Comments
 (0)